yhavinga commited on
Commit
c287bf2
1 Parent(s): 3fca42c

Add model and scripts

Browse files
Files changed (7) hide show
  1. .gitignore +2 -0
  2. config.json +40 -0
  3. run_clm_flax.py +889 -0
  4. run_gpt.sh +33 -0
  5. special_tokens_map.json +1 -0
  6. tokenizer.json +0 -0
  7. tokenizer_config.json +494 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ data
2
+ *~
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.0,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.0,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1024,
16
+ "n_head": 16,
17
+ "n_inner": null,
18
+ "n_layer": 24,
19
+ "n_positions": 1024,
20
+ "n_special": 0,
21
+ "predict_special_tokens": true,
22
+ "reorder_and_upcast_attn": false,
23
+ "resid_pdrop": 0.0,
24
+ "scale_attn_by_inverse_layer_idx": false,
25
+ "scale_attn_weights": true,
26
+ "summary_activation": null,
27
+ "summary_first_dropout": 0.1,
28
+ "summary_proj_to_labels": true,
29
+ "summary_type": "cls_index",
30
+ "summary_use_proj": true,
31
+ "task_specific_params": {
32
+ "text-generation": {
33
+ "do_sample": true,
34
+ "max_length": 50
35
+ }
36
+ },
37
+ "transformers_version": "4.13.0",
38
+ "use_cache": true,
39
+ "vocab_size": 50257
40
+ }
run_clm_flax.py ADDED
@@ -0,0 +1,889 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2021 The HuggingFace Team All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """
17
+ Pre-training/Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset.
18
+
19
+ Here is the full list of checkpoints on the hub that can be fine-tuned by this script:
20
+ https://huggingface.co/models?filter=text-generation
21
+ """
22
+ # You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
23
+
24
+ import json
25
+ import logging
26
+ import math
27
+ import os
28
+ import sys
29
+ import time
30
+ from dataclasses import asdict, dataclass, field
31
+ from enum import Enum
32
+ from itertools import chain
33
+ from pathlib import Path
34
+ from typing import Callable, Optional
35
+ import json
36
+ import shutil
37
+
38
+ import datasets
39
+ import numpy as np
40
+ from datasets import Dataset, load_dataset
41
+ from tqdm import tqdm
42
+
43
+ import jax
44
+ import jax.numpy as jnp
45
+ import optax
46
+ import transformers
47
+ from flax import jax_utils, traverse_util
48
+ from flax.jax_utils import unreplicate
49
+ from flax.training import train_state
50
+ from flax.training.checkpoints import save_checkpoint, restore_checkpoint
51
+ from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
52
+ from flax.serialization import to_bytes, from_bytes
53
+ from transformers import (
54
+ CONFIG_MAPPING,
55
+ FLAX_MODEL_FOR_CAUSAL_LM_MAPPING,
56
+ AutoConfig,
57
+ AutoTokenizer,
58
+ FlaxAutoModelForCausalLM,
59
+ HfArgumentParser,
60
+ is_tensorboard_available,
61
+ set_seed,
62
+ )
63
+ from transformers.file_utils import get_full_repo_name
64
+ from transformers.testing_utils import CaptureLogger
65
+
66
+
67
+ logger = logging.getLogger(__name__)
68
+
69
+ MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_CAUSAL_LM_MAPPING.keys())
70
+ MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
71
+
72
+
73
+ @dataclass
74
+ class TrainingArguments:
75
+ output_dir: str = field(
76
+ metadata={"help": "The output directory where the model predictions and checkpoints will be written."},
77
+ )
78
+ overwrite_output_dir: bool = field(
79
+ default=False,
80
+ metadata={
81
+ "help": (
82
+ "Overwrite the content of the output directory. "
83
+ "Use this to continue training if output_dir points to a checkpoint directory."
84
+ )
85
+ },
86
+ )
87
+ do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
88
+ do_eval: bool = field(default=False, metadata={"help": "Whether to run eval on the dev set."})
89
+ per_device_train_batch_size: int = field(
90
+ default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for training."}
91
+ )
92
+ per_device_eval_batch_size: int = field(
93
+ default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for evaluation."}
94
+ )
95
+ learning_rate: float = field(default=5e-5, metadata={"help": "The initial learning rate for AdamW."})
96
+ weight_decay: float = field(default=0.0, metadata={"help": "Weight decay for AdamW if we apply some."})
97
+ adam_beta1: float = field(default=0.9, metadata={"help": "Beta1 for AdamW optimizer"})
98
+ adam_beta2: float = field(default=0.999, metadata={"help": "Beta2 for AdamW optimizer"})
99
+ adam_epsilon: float = field(default=1e-8, metadata={"help": "Epsilon for AdamW optimizer."})
100
+ adafactor: bool = field(default=False, metadata={"help": "Whether or not to replace AdamW by Adafactor."})
101
+ num_train_epochs: float = field(default=3.0, metadata={"help": "Total number of training epochs to perform."})
102
+ warmup_steps: int = field(default=0, metadata={"help": "Linear warmup over warmup_steps."})
103
+ logging_steps: int = field(default=500, metadata={"help": "Log every X updates steps."})
104
+ save_steps: int = field(default=500, metadata={"help": "Save checkpoint every X updates steps."})
105
+ eval_steps: int = field(default=None, metadata={"help": "Run an evaluation every X steps."})
106
+ seed: int = field(default=42, metadata={"help": "Random seed that will be set at the beginning of training."})
107
+ push_to_hub: bool = field(
108
+ default=False, metadata={"help": "Whether or not to upload the trained model to the model hub after training."}
109
+ )
110
+ hub_model_id: str = field(
111
+ default=None, metadata={"help": "The name of the repository to keep in sync with the local `output_dir`."}
112
+ )
113
+ hub_token: str = field(default=None, metadata={"help": "The token to use to push to the Model Hub."})
114
+
115
+ def __post_init__(self):
116
+ if self.output_dir is not None:
117
+ self.output_dir = os.path.expanduser(self.output_dir)
118
+
119
+ def to_dict(self):
120
+ """
121
+ Serializes this instance while replace `Enum` by their values (for JSON serialization support). It obfuscates
122
+ the token values by removing their value.
123
+ """
124
+ d = asdict(self)
125
+ for k, v in d.items():
126
+ if isinstance(v, Enum):
127
+ d[k] = v.value
128
+ if isinstance(v, list) and len(v) > 0 and isinstance(v[0], Enum):
129
+ d[k] = [x.value for x in v]
130
+ if k.endswith("_token"):
131
+ d[k] = f"<{k.upper()}>"
132
+ return d
133
+
134
+
135
+ @dataclass
136
+ class ModelArguments:
137
+ """
138
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
139
+ """
140
+
141
+ model_name_or_path: Optional[str] = field(
142
+ default=None,
143
+ metadata={
144
+ "help": "The model checkpoint for weights initialization."
145
+ "Don't set if you want to train a model from scratch."
146
+ },
147
+ )
148
+ model_type: Optional[str] = field(
149
+ default=None,
150
+ metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
151
+ )
152
+ config_name: Optional[str] = field(
153
+ default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
154
+ )
155
+ tokenizer_name: Optional[str] = field(
156
+ default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
157
+ )
158
+ cache_dir: Optional[str] = field(
159
+ default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
160
+ )
161
+ use_fast_tokenizer: bool = field(
162
+ default=True,
163
+ metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
164
+ )
165
+ dtype: Optional[str] = field(
166
+ default="float32",
167
+ metadata={
168
+ "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
169
+ },
170
+ )
171
+
172
+
173
+ @dataclass
174
+ class DataTrainingArguments:
175
+ """
176
+ Arguments pertaining to what data we are going to input our model for training and eval.
177
+ """
178
+
179
+ dataset_name: Optional[str] = field(
180
+ default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
181
+ )
182
+ dataset_config_name: Optional[str] = field(
183
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
184
+ )
185
+ train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
186
+ validation_file: Optional[str] = field(
187
+ default=None,
188
+ metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
189
+ )
190
+ max_train_samples: Optional[int] = field(
191
+ default=None,
192
+ metadata={
193
+ "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
194
+ "value if set."
195
+ },
196
+ )
197
+ max_eval_samples: Optional[int] = field(
198
+ default=None,
199
+ metadata={
200
+ "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
201
+ "value if set."
202
+ },
203
+ )
204
+ overwrite_cache: bool = field(
205
+ default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
206
+ )
207
+ validation_split_percentage: Optional[int] = field(
208
+ default=5,
209
+ metadata={
210
+ "help": "The percentage of the train set used as validation set in case there's no validation split"
211
+ },
212
+ )
213
+ block_size: Optional[int] = field(
214
+ default=None,
215
+ metadata={
216
+ "help": "Optional input sequence length after tokenization. "
217
+ "The training dataset will be truncated in block of this size for training. "
218
+ "Default to the model max input length for single sentence inputs (take into account special tokens)."
219
+ },
220
+ )
221
+ overwrite_cache: bool = field(
222
+ default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
223
+ )
224
+ preprocessing_num_workers: Optional[int] = field(
225
+ default=None,
226
+ metadata={"help": "The number of processes to use for the preprocessing."},
227
+ )
228
+ keep_linebreaks: bool = field(
229
+ default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."}
230
+ )
231
+
232
+ def __post_init__(self):
233
+ if self.dataset_name is None and self.train_file is None and self.validation_file is None:
234
+ raise ValueError("Need either a dataset name or a training/validation file.")
235
+ else:
236
+ if self.train_file is not None:
237
+ extension = self.train_file.split(".")[-1]
238
+ assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file."
239
+ if self.validation_file is not None:
240
+ extension = self.validation_file.split(".")[-1]
241
+ assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
242
+
243
+
244
+ class TrainState(train_state.TrainState):
245
+ dropout_rng: jnp.ndarray
246
+
247
+ def replicate(self):
248
+ return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
249
+
250
+
251
+ def data_loader(rng: jax.random.PRNGKey, dataset: Dataset, batch_size: int, shuffle: bool = False):
252
+ """
253
+ Returns batches of size `batch_size` from truncated `dataset`, sharded over all local devices.
254
+ Shuffle batches if `shuffle` is `True`.
255
+ """
256
+ steps_per_epoch = len(dataset) // batch_size
257
+
258
+ if shuffle:
259
+ batch_idx = jax.random.permutation(rng, len(dataset))
260
+ else:
261
+ batch_idx = jnp.arange(len(dataset))
262
+
263
+ batch_idx = batch_idx[: steps_per_epoch * batch_size] # Skip incomplete batch.
264
+ batch_idx = batch_idx.reshape((steps_per_epoch, batch_size))
265
+
266
+ for idx in batch_idx:
267
+ batch = dataset[idx]
268
+ batch = {k: np.array(v) for k, v in batch.items()}
269
+
270
+ yield batch
271
+
272
+
273
+ def write_train_metric(summary_writer, train_metrics, train_time, step):
274
+ summary_writer.scalar("train_time", train_time, step)
275
+
276
+ train_metrics = get_metrics(train_metrics)
277
+ for key, vals in train_metrics.items():
278
+ tag = f"train_{key}"
279
+ for i, val in enumerate(vals):
280
+ summary_writer.scalar(tag, val, step - len(vals) + i + 1)
281
+
282
+
283
+ def write_eval_metric(summary_writer, eval_metrics, step):
284
+ for metric_name, value in eval_metrics.items():
285
+ summary_writer.scalar(f"eval_{metric_name}", value, step)
286
+
287
+
288
+ def create_learning_rate_fn(
289
+ train_ds_size: int, train_batch_size: int, num_train_epochs: int, num_warmup_steps: int, learning_rate: float
290
+ ) -> Callable[[int], jnp.array]:
291
+ """Returns a linear warmup, linear_decay learning rate function."""
292
+ steps_per_epoch = train_ds_size // train_batch_size
293
+ num_train_steps = steps_per_epoch * num_train_epochs
294
+ warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
295
+ decay_fn = optax.linear_schedule(
296
+ init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
297
+ )
298
+ schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
299
+ return schedule_fn
300
+
301
+
302
+ # utils
303
+ def mb_item(x):
304
+ return x.item() if hasattr(x, "item") else x
305
+
306
+
307
+ # checkpoint functions
308
+ def save_model_checkpoint(model, save_dir, state, with_opt: bool = True, push_to_hub: bool = False):
309
+ """
310
+ If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`.
311
+ """
312
+ state = jax_utils.unreplicate(state)
313
+ logger.info(f"SAVING CHECKPOINT IN {save_dir}...")
314
+ if not push_to_hub:
315
+ save_dir = f"{save_dir}/ckpt-{mb_item(state.step) - 1}"
316
+ model.save_pretrained(
317
+ save_dir,
318
+ params=state.params,
319
+ push_to_hub=push_to_hub,
320
+ commit_message=f"Saving weights and logs at step {mb_item(state.step) - 1}",
321
+ )
322
+ if with_opt:
323
+ with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f:
324
+ f.write(to_bytes(state.opt_state))
325
+ with open(os.path.join(save_dir, "training_state.json"), "w") as f:
326
+ json.dump({"step": state.step.item()}, f)
327
+ logger.info("checkpoint saved")
328
+
329
+
330
+ # this is added to make resuming from checkpoint to work with adafactor
331
+ # to be removed when issue is fixed
332
+ # notice that adafactor state is perturbed by fake_update
333
+ def _zeros_tree_like(inp_tree):
334
+ return jax.tree_map(jnp.zeros_like, inp_tree)
335
+
336
+
337
+ def fake_update(state):
338
+ fake_updates = _zeros_tree_like(state.params)
339
+ _, new_inner_opt_state = state.tx.inner_opt.update(fake_updates, state.opt_state.inner_opt_state, state.params)
340
+ opt_state = state.opt_state
341
+ new_opt_state = optax.MultiStepsState(mini_step=opt_state.mini_step,
342
+ gradient_step=opt_state.gradient_step,
343
+ inner_opt_state=new_inner_opt_state,
344
+ acc_grads=opt_state.acc_grads)
345
+ return state.replace(opt_state=new_opt_state)
346
+
347
+
348
+ def reinstantiate_states(opt_state):
349
+ new_state = []
350
+ for state in opt_state:
351
+ if isinstance(state, list):
352
+ new_state.append(reinstantiate_states(state))
353
+ else:
354
+ cls = getattr(optax, type(state).__name__)
355
+ new_state.append(cls(**{k: getattr(state, k) for k in state._fields}))
356
+ return new_state
357
+
358
+
359
+ def restore_model_checkpoint(save_dir, state):
360
+ logger.info(f"RESTORING CHECKPOINT FROM {save_dir}...")
361
+ with open(os.path.join(save_dir, "flax_model.msgpack"), "rb") as f:
362
+ params = from_bytes(state.params, f.read())
363
+
364
+ with open(os.path.join(save_dir, "opt_state.msgpack"), "rb") as f:
365
+ opt_state = from_bytes(state.opt_state, f.read())
366
+
367
+ with open(os.path.join(save_dir, "training_state.json"), "r") as f:
368
+ training_state = json.load(f)
369
+ step = training_state["step"]
370
+
371
+ logger.info("checkpoint restored")
372
+ # reinstantiate inner opt state to avoid type conflict
373
+ if hasattr(opt_state, "inner_opt_state"):
374
+ print("restoring state of multisteps optimizer")
375
+ inner_opt_state = reinstantiate_states(opt_state.inner_opt_state)
376
+ ms_state_dict = {k: getattr(state.opt_state, k) for k in state.opt_state._fields}
377
+ ms_state_dict["inner_opt_state"] = inner_opt_state
378
+ opt_state = optax.MultiStepsState(**ms_state_dict)
379
+
380
+ return state.replace(step=step, params=params, opt_state=opt_state)
381
+
382
+
383
+ def rotate_checkpoints(ckpt_dir: str, save_total_limit: int):
384
+ "Removes older checkpoints so that `save_total_limit` checkpoints are kept"
385
+ # TODO: what to remove is decided using step number only, we might want to improve that
386
+ ckpts = [str(x) for x in Path(ckpt_dir).glob("ckpt-*")]
387
+ # sort checkpoints by step
388
+ ckpts_sorted = sorted(ckpts, key=lambda x: int(x.split('-')[-1]))
389
+ ckpts_to_delete = ckpts_sorted[:-save_total_limit]
390
+ for ckpt in ckpts_to_delete:
391
+ logger.info(f"Deleting older checkpoint [{ckpt}] due to save_total_limit ({save_total_limit})")
392
+ shutil.rmtree(ckpt)
393
+
394
+
395
+ def main():
396
+ # See all possible arguments in src/transformers/training_args.py
397
+ # or by passing the --help flag to this script.
398
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
399
+
400
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
401
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
402
+ # If we pass only one argument to the script and it's the path to a json file,
403
+ # let's parse it to get our arguments.
404
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
405
+ else:
406
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
407
+
408
+ if (
409
+ os.path.exists(training_args.output_dir)
410
+ and os.listdir(training_args.output_dir)
411
+ and training_args.do_train
412
+ and not training_args.overwrite_output_dir
413
+ ):
414
+ raise ValueError(
415
+ f"Output directory ({training_args.output_dir}) already exists and is not empty."
416
+ "Use --overwrite_output_dir to overcome."
417
+ )
418
+
419
+ # Make one log on every process with the configuration for debugging.
420
+ logging.basicConfig(
421
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
422
+ datefmt="%m/%d/%Y %H:%M:%S",
423
+ level=logging.INFO,
424
+ )
425
+ # Setup logging, we only want one process per machine to log things on the screen.
426
+ logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
427
+ if jax.process_index() == 0:
428
+ datasets.utils.logging.set_verbosity_warning()
429
+ transformers.utils.logging.set_verbosity_info()
430
+ else:
431
+ datasets.utils.logging.set_verbosity_error()
432
+ transformers.utils.logging.set_verbosity_error()
433
+
434
+ # Set the verbosity to info of the Transformers logger (on main process only):
435
+ logger.info(f"Training/evaluation parameters {training_args}")
436
+
437
+ # Set seed before initializing model.
438
+ set_seed(training_args.seed)
439
+
440
+ # # Handle the repository creation
441
+ # if training_args.push_to_hub:
442
+ # if training_args.hub_model_id is None:
443
+ # repo_name = get_full_repo_name(
444
+ # Path(training_args.output_dir).absolute().name, token=training_args.hub_token
445
+ # )
446
+ # else:
447
+ # repo_name = training_args.hub_model_id
448
+ # repo = Repository(training_args.output_dir, clone_from=repo_name)
449
+
450
+ # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
451
+ # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
452
+ # (the dataset will be downloaded automatically from the datasets Hub).
453
+ #
454
+ # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
455
+ # 'text' is found. You can easily tweak this behavior (see below).
456
+ #
457
+ # In distributed training, the load_dataset function guarantees that only one local process can concurrently
458
+ # download the dataset.
459
+ if data_args.dataset_name is not None:
460
+ # Downloading and loading a dataset from the hub.
461
+ dataset = load_dataset(
462
+ data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, keep_in_memory=False
463
+ )
464
+
465
+ if "validation" not in dataset.keys():
466
+ dataset["validation"] = load_dataset(
467
+ data_args.dataset_name,
468
+ data_args.dataset_config_name,
469
+ split=f"train[:{data_args.validation_split_percentage}%]",
470
+ cache_dir=model_args.cache_dir,
471
+ )
472
+ dataset["train"] = load_dataset(
473
+ data_args.dataset_name,
474
+ data_args.dataset_config_name,
475
+ split=f"train[{data_args.validation_split_percentage}%:]",
476
+ cache_dir=model_args.cache_dir,
477
+ )
478
+ else:
479
+ data_files = {}
480
+ dataset_args = {}
481
+ if data_args.train_file is not None:
482
+ data_files["train"] = data_args.train_file
483
+ if data_args.validation_file is not None:
484
+ data_files["validation"] = data_args.validation_file
485
+ extension = data_args.train_file.split(".")[-1]
486
+ if extension == "txt":
487
+ extension = "text"
488
+ dataset_args["keep_linebreaks"] = data_args.keep_linebreaks
489
+ dataset = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir, **dataset_args)
490
+
491
+ if "validation" not in dataset.keys():
492
+ dataset["validation"] = load_dataset(
493
+ extension,
494
+ data_files=data_files,
495
+ split=f"train[:{data_args.validation_split_percentage}%]",
496
+ cache_dir=model_args.cache_dir,
497
+ **dataset_args,
498
+ )
499
+ dataset["train"] = load_dataset(
500
+ extension,
501
+ data_files=data_files,
502
+ split=f"train[{data_args.validation_split_percentage}%:]",
503
+ cache_dir=model_args.cache_dir,
504
+ **dataset_args,
505
+ )
506
+ # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
507
+ # https://huggingface.co/docs/datasets/loading_datasets.html.
508
+
509
+ # Load pretrained model and tokenizer
510
+
511
+ # Distributed training:
512
+ # The .from_pretrained methods guarantee that only one local process can concurrently
513
+ # download model & vocab.
514
+ if model_args.config_name:
515
+ config = AutoConfig.from_pretrained(model_args.config_name, cache_dir=model_args.cache_dir)
516
+ elif model_args.model_name_or_path:
517
+ config = AutoConfig.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir)
518
+ else:
519
+ config = CONFIG_MAPPING[model_args.model_type]()
520
+ logger.warning("You are instantiating a new config instance from scratch.")
521
+
522
+ if model_args.tokenizer_name:
523
+ tokenizer = AutoTokenizer.from_pretrained(
524
+ model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
525
+ )
526
+ elif model_args.model_name_or_path:
527
+ tokenizer = AutoTokenizer.from_pretrained(
528
+ model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
529
+ )
530
+ else:
531
+ raise ValueError(
532
+ "You are instantiating a new tokenizer from scratch. This is not supported by this script."
533
+ "You can do it from another script, save it, and load it from here, using --tokenizer_name."
534
+ )
535
+
536
+ if model_args.model_name_or_path:
537
+ model = FlaxAutoModelForCausalLM.from_pretrained(
538
+ model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
539
+ )
540
+ else:
541
+ model = FlaxAutoModelForCausalLM.from_config(
542
+ config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
543
+ )
544
+
545
+ # Preprocessing the datasets.
546
+ # First we tokenize all the texts.
547
+ if training_args.do_train:
548
+ column_names = dataset["train"].column_names
549
+ else:
550
+ column_names = dataset["validation"].column_names
551
+ text_column_name = "text" if "text" in column_names else column_names[0]
552
+
553
+ # since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
554
+ tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
555
+
556
+ def tokenize_function(examples):
557
+ with CaptureLogger(tok_logger) as cl:
558
+ output = tokenizer(examples[text_column_name])
559
+ # clm input could be much much longer than block_size
560
+ if "Token indices sequence length is longer than the" in cl.out:
561
+ tok_logger.warning(
562
+ "^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model."
563
+ )
564
+ return output
565
+
566
+ tokenized_datasets = dataset.map(
567
+ tokenize_function,
568
+ batched=True,
569
+ num_proc=data_args.preprocessing_num_workers,
570
+ remove_columns=column_names,
571
+ load_from_cache_file=not data_args.overwrite_cache,
572
+ )
573
+
574
+ if data_args.block_size is None:
575
+ block_size = tokenizer.model_max_length
576
+ if block_size > config.max_position_embeddings:
577
+ logger.warning(
578
+ f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
579
+ "Picking 1024 instead. You can change that default value by passing --block_size xxx."
580
+ )
581
+ block_size = 1024
582
+ else:
583
+ if data_args.block_size > tokenizer.model_max_length:
584
+ logger.warning(
585
+ f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
586
+ f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
587
+ )
588
+ block_size = min(data_args.block_size, tokenizer.model_max_length)
589
+
590
+ # Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
591
+ def group_texts(examples):
592
+ # Concatenate all texts.
593
+ concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
594
+ total_length = len(concatenated_examples[list(examples.keys())[0]])
595
+ # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
596
+ # customize this part to your needs.
597
+ if total_length >= block_size:
598
+ total_length = (total_length // block_size) * block_size
599
+ # Split by chunks of max_len.
600
+ result = {
601
+ k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
602
+ for k, t in concatenated_examples.items()
603
+ }
604
+ result["labels"] = result["input_ids"].copy()
605
+ return result
606
+
607
+ # Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder
608
+ # for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower
609
+ # to preprocess.
610
+ #
611
+ # To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
612
+ # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
613
+
614
+ lm_datasets = tokenized_datasets.map(
615
+ group_texts,
616
+ batched=True,
617
+ num_proc=data_args.preprocessing_num_workers,
618
+ load_from_cache_file=not data_args.overwrite_cache,
619
+ )
620
+
621
+ if training_args.do_train:
622
+ if "train" not in tokenized_datasets:
623
+ raise ValueError("--do_train requires a train dataset")
624
+ train_dataset = lm_datasets["train"]
625
+ if data_args.max_train_samples is not None:
626
+ train_dataset = train_dataset.select(range(data_args.max_train_samples))
627
+
628
+ if training_args.do_eval:
629
+ if "validation" not in tokenized_datasets:
630
+ raise ValueError("--do_eval requires a validation dataset")
631
+ eval_dataset = lm_datasets["validation"]
632
+ if data_args.max_eval_samples is not None:
633
+ eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
634
+
635
+ # Enable tensorboard only on the master node
636
+ has_tensorboard = is_tensorboard_available()
637
+ if has_tensorboard and jax.process_index() == 0:
638
+ try:
639
+ from flax.metrics.tensorboard import SummaryWriter
640
+
641
+ summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir + "/runs"))
642
+ except ImportError as ie:
643
+ has_tensorboard = False
644
+ logger.warning(
645
+ f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
646
+ )
647
+ else:
648
+ logger.warning(
649
+ "Unable to display metrics through TensorBoard because the package is not installed: "
650
+ "Please run pip install tensorboard to enable."
651
+ )
652
+
653
+ # Initialize our training
654
+ rng = jax.random.PRNGKey(training_args.seed)
655
+ rng, dropout_rng = jax.random.split(rng)
656
+
657
+ # Store some constant
658
+ num_epochs = int(training_args.num_train_epochs)
659
+ train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
660
+ eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
661
+ steps_per_epoch = len(train_dataset) // train_batch_size
662
+ total_train_steps = steps_per_epoch * num_epochs
663
+
664
+ # Create learning rate schedule
665
+ linear_decay_lr_schedule_fn = create_learning_rate_fn(
666
+ len(train_dataset),
667
+ train_batch_size,
668
+ training_args.num_train_epochs,
669
+ training_args.warmup_steps,
670
+ training_args.learning_rate,
671
+ )
672
+
673
+ # We use Optax's "masking" functionality to not apply weight decay
674
+ # to bias and LayerNorm scale parameters. decay_mask_fn returns a
675
+ # mask boolean with the same structure as the parameters.
676
+ # The mask is True for parameters that should be decayed.
677
+ # Note that this mask is specifically adapted for FlaxGPT2.
678
+ # For other models, one should correct the layer norm parameter naming
679
+ # accordingly.
680
+ def decay_mask_fn(params):
681
+ flat_params = traverse_util.flatten_dict(params)
682
+ flat_mask = {
683
+ path: (path[-1] != "bias" and path[-2:] not in [("ln_1", "scale"), ("ln_2", "scale"), ("ln_f", "scale")])
684
+ for path in flat_params
685
+ }
686
+ return traverse_util.unflatten_dict(flat_mask)
687
+
688
+ # create adam optimizer
689
+ if training_args.adafactor:
690
+ # We use the default parameters here to initialize adafactor,
691
+ # For more details about the parameters please check https://github.com/deepmind/optax/blob/ed02befef9bf81cbbf236be3d2b0e032e9ed4a40/optax/_src/alias.py#L74
692
+ optimizer = optax.adafactor(
693
+ learning_rate=linear_decay_lr_schedule_fn,
694
+ )
695
+ else:
696
+ optimizer = optax.adamw(
697
+ learning_rate=linear_decay_lr_schedule_fn,
698
+ b1=training_args.adam_beta1,
699
+ b2=training_args.adam_beta2,
700
+ eps=training_args.adam_epsilon,
701
+ weight_decay=training_args.weight_decay,
702
+ mask=decay_mask_fn,
703
+ )
704
+
705
+ # Setup train state
706
+ state = TrainState.create(apply_fn=model.__call__, params=model.params, tx=optimizer, dropout_rng=dropout_rng)
707
+
708
+ # if training_args.resume_from_checkpoint:
709
+ # state = restore_model_checkpoint(training_args.resume_from_checkpoint, state)
710
+ # resume_step = mb_item(state.step)
711
+ # if training_args.adafactor:
712
+ # state = fake_update(state)
713
+ # else:
714
+ resume_step = 0
715
+
716
+ def loss_fn(logits, labels):
717
+ shift_logits = logits[..., :-1, :]
718
+ shift_labels = labels[..., 1:]
719
+ loss = optax.softmax_cross_entropy(shift_logits, onehot(shift_labels, shift_logits.shape[-1]))
720
+ return loss.mean()
721
+
722
+ # Define gradient update step fn
723
+ def train_step(state, batch):
724
+ dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
725
+
726
+ def compute_loss(params):
727
+ labels = batch.pop("labels")
728
+ logits = state.apply_fn(**batch, params=params, dropout_rng=dropout_rng, train=True)[0]
729
+ loss = loss_fn(logits, labels)
730
+ return loss
731
+
732
+ grad_fn = jax.value_and_grad(compute_loss)
733
+ loss, grad = grad_fn(state.params)
734
+ grad = jax.lax.pmean(grad, "batch")
735
+
736
+ new_state = state.apply_gradients(grads=grad, dropout_rng=new_dropout_rng)
737
+
738
+ metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
739
+ metrics = jax.lax.pmean(metrics, axis_name="batch")
740
+
741
+ return new_state, metrics
742
+
743
+ # Define eval fn
744
+ def eval_step(params, batch):
745
+ labels = batch.pop("labels")
746
+ logits = model(**batch, params=params, train=False)[0]
747
+ loss = loss_fn(logits, labels)
748
+
749
+ # summarize metrics
750
+ metrics = {"loss": loss}
751
+ metrics = jax.lax.pmean(metrics, axis_name="batch")
752
+ return metrics
753
+
754
+ # Create parallel version of the train and eval step
755
+ p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
756
+ p_eval_step = jax.pmap(eval_step, "batch")
757
+
758
+ # Replicate the train state on each device
759
+ state = state.replicate()
760
+
761
+ logger.info("***** Running training *****")
762
+ logger.info(f" Num examples = {len(train_dataset)}")
763
+ logger.info(f" Num Epochs = {num_epochs}")
764
+ logger.info(f" Num tokenized group examples {len(tokenized_datasets['train'])}")
765
+ logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
766
+ logger.info(f" Total train batch size (w. parallel & distributed) = {train_batch_size}")
767
+ logger.info(f" Total optimization steps = {total_train_steps}")
768
+
769
+ train_time = 0
770
+ train_metrics = []
771
+ resume_epoch = resume_step // (steps_per_epoch)
772
+ epochs = tqdm(range(num_epochs), desc=f"Epoch ... ({resume_epoch + 1}/{num_epochs})", position=0)
773
+ if resume_step != 0:
774
+ logger.info(f"Skipping to epoch {resume_epoch} step {resume_step}")
775
+ for epoch in epochs:
776
+ # ======================== Training ================================
777
+ if epoch < resume_epoch:
778
+ continue
779
+
780
+ train_start = time.time()
781
+
782
+ # Create sampling rng
783
+ rng, input_rng = jax.random.split(rng)
784
+
785
+ # Generate an epoch by shuffling sampling indices from the train dataset
786
+ train_loader = data_loader(input_rng, train_dataset, train_batch_size, shuffle=True)
787
+ steps_per_epoch = len(train_dataset) // train_batch_size
788
+ # train
789
+ for step in tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False):
790
+ cur_step = epoch * (len(train_dataset) // train_batch_size) + step
791
+ # skip to the step from which we are resuming
792
+ if cur_step < resume_step:
793
+ continue
794
+
795
+ batch = next(train_loader)
796
+ batch = shard(batch)
797
+ state, train_metric = p_train_step(state, batch)
798
+ train_metrics.append(train_metric)
799
+
800
+
801
+ if cur_step % training_args.logging_steps == 0 and cur_step > 0:
802
+ # Save metrics
803
+ train_metric = unreplicate(train_metric)
804
+ train_time += time.time() - train_start
805
+ if has_tensorboard and jax.process_index() == 0:
806
+ write_train_metric(summary_writer, train_metrics, train_time, cur_step)
807
+
808
+ epochs.write(
809
+ f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
810
+ )
811
+
812
+ train_metrics = []
813
+
814
+ if cur_step % training_args.eval_steps == 0 and cur_step > 0:
815
+ # ======================== Evaluating ==============================
816
+ eval_metrics = []
817
+ eval_loader = data_loader(input_rng, eval_dataset, eval_batch_size)
818
+ eval_steps = len(eval_dataset) // eval_batch_size
819
+ for _ in tqdm(range(eval_steps), desc="Evaluating...", position=2, leave=False):
820
+ # Model forward
821
+ batch = next(eval_loader)
822
+ batch = shard(batch)
823
+ metrics = p_eval_step(state.params, batch)
824
+ eval_metrics.append(metrics)
825
+
826
+ # normalize eval metrics
827
+ eval_metrics = get_metrics(eval_metrics)
828
+ eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
829
+
830
+ try:
831
+ eval_metrics["perplexity"] = math.exp(eval_metrics["loss"])
832
+ except OverflowError:
833
+ eval_metrics["perplexity"] = float("inf")
834
+
835
+ # Print metrics and update progress bar
836
+ desc = f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity: {eval_metrics['perplexity']})"
837
+ epochs.write(desc)
838
+ epochs.desc = desc
839
+
840
+ # Save metrics
841
+ if has_tensorboard and jax.process_index() == 0:
842
+ write_eval_metric(summary_writer, eval_metrics, cur_step)
843
+
844
+ if cur_step % training_args.save_steps == 0 and cur_step > 0:
845
+ # save checkpoint after each epoch and push checkpoint to the hub
846
+ if jax.process_index() == 0:
847
+ save_model_checkpoint(model, training_args.output_dir, state, with_opt=False,
848
+ push_to_hub=training_args.push_to_hub)
849
+ # params = jax.device_get(unreplicate(state.params))
850
+ # model.save_pretrained(training_args.output_dir, params=params)
851
+ # tokenizer.save_pretrained(training_args.output_dir)
852
+ # if training_args.push_to_hub:
853
+ # repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
854
+
855
+ # Eval after training
856
+ if training_args.do_eval:
857
+ eval_metrics = []
858
+ eval_loader = data_loader(input_rng, eval_dataset, eval_batch_size)
859
+ eval_steps = len(eval_dataset) // eval_batch_size
860
+ for _ in tqdm(range(eval_steps), desc="Evaluating...", position=2, leave=False):
861
+ # Model forward
862
+ batch = shard(next(eval_loader))
863
+ metrics = p_eval_step(state.params, batch)
864
+ eval_metrics.append(metrics)
865
+
866
+ # normalize eval metrics
867
+ eval_metrics = get_metrics(eval_metrics)
868
+ eval_metrics = jax.tree_map(lambda x: jnp.mean(x).item(), eval_metrics)
869
+
870
+ try:
871
+ eval_metrics["perplexity"] = math.exp(eval_metrics["loss"])
872
+ except OverflowError:
873
+ eval_metrics["perplexity"] = float("inf")
874
+
875
+ if jax.process_index() == 0:
876
+ eval_metrics = {f"eval_{metric_name}": value for metric_name, value in eval_metrics.items()}
877
+ path = os.path.join(training_args.output_dir, "eval_results.json")
878
+ with open(path, "w") as f:
879
+ json.dump(eval_metrics, f, indent=4, sort_keys=True)
880
+
881
+ # save model after training is over
882
+ if jax.process_index() == 0:
883
+ save_model_checkpoint(model, training_args.output_dir, state, with_opt=False,
884
+ push_to_hub=training_args.push_to_hub)
885
+
886
+
887
+
888
+ if __name__ == "__main__":
889
+ main()
run_gpt.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ export HF_PROJECT="gpt2-medium-dutch"
4
+
5
+ # Variables for training the tokenizer and creating the config
6
+ export VOCAB_SIZE="50257"
7
+ export DATASET="yhavinga/mc4_nl_cleaned" # Name of the dataset in the Huggingface Hub
8
+ export DATASET_CONFIG="full" # Config of the dataset in the Huggingface Hub
9
+ export DATASET_SPLIT="train" # Split to use for training tokenizer and model
10
+ export TEXT_FIELD="text" # Field containing the text to be used for training
11
+ export CONFIG_TYPE="gpt2-medium" # Config that our model will use
12
+ export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model, e.g. here inside the mount
13
+
14
+ python run_clm_flax.py \
15
+ --output_dir="${MODEL_PATH}" \
16
+ --model_type="gpt2" \
17
+ --config_name="${MODEL_PATH}" \
18
+ --tokenizer_name="${MODEL_PATH}" \
19
+ --preprocessing_num_workers="96" \
20
+ --do_train --do_eval \
21
+ --dataset_name="${DATASET}" \
22
+ --dataset_config_name="${DATASET_CONFIG}" \
23
+ --block_size="512" \
24
+ --per_device_train_batch_size="16" \
25
+ --per_device_eval_batch_size="16" \
26
+ --learning_rate="8e-4" --warmup_steps="5000" \
27
+ --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
28
+ --overwrite_output_dir \
29
+ --num_train_epochs="4" \
30
+ --logging_steps="500" \
31
+ --save_steps="40000" \
32
+ --eval_steps="2500" \
33
+ --push_to_hub
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,494 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html class="">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta
6
+ name="viewport"
7
+ content="width=device-width, initial-scale=1.0, user-scalable=no"
8
+ />
9
+ <meta name="description" content="We’re on a journey to advance and democratize artificial intelligence through open source and open science." />
10
+ <meta property="fb:app_id" content="1321688464574422" />
11
+ <meta name="twitter:card" content="summary_large_image" />
12
+ <meta name="twitter:site" content="@huggingface" />
13
+ <meta property="og:title" content="tokenizer_config.json · flax-community/gpt2-medium-indonesian at main" />
14
+ <meta property="og:type" content="website" />
15
+ <meta property="og:url" content="https://huggingface.co/flax-community/gpt2-medium-indonesian/blob/main/tokenizer_config.json" />
16
+ <meta property="og:image" content="https://huggingface.co/front/thumbnails/v2-2.png" />
17
+
18
+ <link rel="stylesheet" href="/front/build/style.219a3fdc.css" />
19
+
20
+ <link rel="preconnect" href="https://fonts.gstatic.com" />
21
+ <link
22
+ href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:ital,wght@0,200;0,300;0,400;0,600;0,700;0,900;1,200;1,300;1,400;1,600;1,700;1,900&display=swap"
23
+ rel="stylesheet"
24
+ />
25
+ <link
26
+ href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap"
27
+ rel="stylesheet"
28
+ />
29
+ <link
30
+ rel="stylesheet"
31
+ href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css"
32
+ />
33
+
34
+
35
+
36
+ <title>tokenizer_config.json · flax-community/gpt2-medium-indonesian at main</title>
37
+ </head>
38
+ <body
39
+ class="flex flex-col min-h-screen bg-white dark:bg-gray-950 text-black ViewerBlobPage"
40
+ >
41
+ <div class="flex flex-col min-h-screen "><header class="border-b border-gray-100"><div class="w-full px-4 lg:px-6 xl:container flex items-center h-16"><div class="flex flex-1 items-center"><a class="flex flex-none items-center mr-5 lg:mr-6" href="/"><img alt="Hugging Face's logo" class="md:mr-2 w-7" src="/front/assets/huggingface_logo-noborder.svg">
42
+ <span class="hidden text-lg font-bold whitespace-nowrap md:block">Hugging Face</span></a>
43
+ <div class="SVELTE_HYDRATER flex-1 lg:max-w-sm mr-2 sm:mr-4 lg:mr-6" data-props="{&quot;header&quot;:true,&quot;placeholder&quot;:&quot;Search models, datasets, users...&quot;,&quot;url&quot;:&quot;/api/quicksearch&quot;,&quot;searchParams&quot;:{&quot;withLinks&quot;:true}}" data-target="QuickSearch"><div class="relative "><input autocomplete="off" class="w-full dark:bg-gray-950
44
+ form-input-alt h-9 pl-8 pr-3 focus:shadow-xl" name="" placeholder="Search models, datasets, users..." spellcheck="false" type="text">
45
+ <svg class="absolute left-2.5 top-2.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M30 28.59L22.45 21A11 11 0 1 0 21 22.45L28.59 30zM5 14a9 9 0 1 1 9 9a9 9 0 0 1-9-9z" fill="currentColor"></path></svg>
46
+ </div></div>
47
+ <div class="SVELTE_HYDRATER " data-props="{&quot;apiInferenceUrl&quot;:&quot;https://api-inference.huggingface.co&quot;}" data-target="NavigationMenuPhone"><button class="lg:hidden relative flex-none place-self-stretch flex items-center justify-center w-8" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="22" height="22" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" fill="currentColor"><path d="M4 24h24v2H4z"></path><path d="M4 12h24v2H4z"></path><path d="M4 18h24v2H4z"></path><path d="M4 6h24v2H4z"></path></svg></button>
48
+ </div></div>
49
+ <nav aria-label="Main" class="ml-auto hidden lg:block"><ul class="flex items-center space-x-2"><li><a class="flex items-center group px-2 py-0.5 hover:text-indigo-700 dark:hover:text-gray-400" href="/models"><svg class="mr-1.5 text-gray-400 group-hover:text-indigo-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg>
50
+ Models
51
+ </a></li>
52
+ <li><a class="flex items-center group px-2 py-0.5 hover:text-red-700 dark:hover:text-gray-400" href="/datasets"><svg class="mr-1.5 text-gray-400 group-hover:text-red-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 25 25"><ellipse cx="12.5" cy="5" fill="currentColor" fill-opacity="0.25" rx="7.5" ry="2"></ellipse><path d="M12.5 15C16.6421 15 20 14.1046 20 13V20C20 21.1046 16.6421 22 12.5 22C8.35786 22 5 21.1046 5 20V13C5 14.1046 8.35786 15 12.5 15Z" fill="currentColor" opacity="0.5"></path><path d="M12.5 7C16.6421 7 20 6.10457 20 5V11.5C20 12.6046 16.6421 13.5 12.5 13.5C8.35786 13.5 5 12.6046 5 11.5V5C5 6.10457 8.35786 7 12.5 7Z" fill="currentColor" opacity="0.5"></path><path d="M5.23628 12C5.08204 12.1598 5 12.8273 5 13C5 14.1046 8.35786 15 12.5 15C16.6421 15 20 14.1046 20 13C20 12.8273 19.918 12.1598 19.7637 12C18.9311 12.8626 15.9947 13.5 12.5 13.5C9.0053 13.5 6.06886 12.8626 5.23628 12Z" fill="currentColor"></path></svg>
53
+ Datasets
54
+ </a></li>
55
+ <li><a class="flex items-center group px-2 py-0.5 hover:text-blue-700 dark:hover:text-blue-400" href="/spaces"><svg class="mr-1.5 text-gray-400 group-hover:text-blue-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 25 25"><path opacity=".5" d="M6.016 14.674v4.31h4.31v-4.31h-4.31ZM14.674 14.674v4.31h4.31v-4.31h-4.31ZM6.016 6.016v4.31h4.31v-4.31h-4.31Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M3 4.914C3 3.857 3.857 3 4.914 3h6.514c.884 0 1.628.6 1.848 1.414a5.171 5.171 0 0 1 7.31 7.31c.815.22 1.414.964 1.414 1.848v6.514A1.914 1.914 0 0 1 20.086 22H4.914A1.914 1.914 0 0 1 3 20.086V4.914Zm3.016 1.102v4.31h4.31v-4.31h-4.31Zm0 12.968v-4.31h4.31v4.31h-4.31Zm8.658 0v-4.31h4.31v4.31h-4.31Zm0-10.813a2.155 2.155 0 1 1 4.31 0 2.155 2.155 0 0 1-4.31 0Z" fill="currentColor"></path><path opacity=".25" d="M16.829 6.016a2.155 2.155 0 1 0 0 4.31 2.155 2.155 0 0 0 0-4.31Z" fill="currentColor"></path></svg>
56
+ Spaces
57
+ </a></li>
58
+ <li class="b-r"><div class="relative v2-dropdown">
59
+ <button class="
60
+ px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400 flex items-center
61
+
62
+ v2-dropdown-button" type="button">
63
+ <svg class="mr-1.5 text-gray-400 w-3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 11 8" preserveAspectRatio="xMidYMid meet" fill="currentColor"><path fill-rule="evenodd" clip-rule="evenodd" d="M11 1.5H0V0.5H11V1.5ZM11 7.5H0V6.5H11V7.5ZM0 4.5H11V3.5H0V4.5Z"></path></svg>
64
+ Resources
65
+ </button>
66
+
67
+
68
+ <div class="absolute top-full mt-1 min-w-full w-auto bg-white rounded-xl overflow-hidden shadow-lg z-10 border border-gray-100
69
+ left-0
70
+ !w-64 v2-dropdown-menu hidden"><ul class="min-w-full w-auto">
71
+ <li><div class="col-span-full px-4 py-0.5 flex items-center justify-between font-semibold bg-gradient-to-r to-white dark:to-gray-925 whitespace-nowrap text-blue-800 from-blue-50 dark:text-blue-100 dark:from-blue-900">Website
72
+ </div>
73
+ <ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
74
+
75
+ hover:underline
76
+ v2-dropdown-entry" href="/metrics">
77
+ <svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M6 23H2a1 1 0 0 1-1-1v-8a1 1 0 0 1 1-1h4a1 1 0 0 1 1 1v8a1 1 0 0 1-1 1z" opacity=".25" fill="currentColor"></path><path class="uim-primary" d="M14 23h-4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h4a1 1 0 0 1 1 1v20a1 1 0 0 1-1 1z" fill="currentColor"></path><path class="uim-tertiary" d="M22 23h-4a1 1 0 0 1-1-1V10a1 1 0 0 1 1-1h4a1 1 0 0 1 1 1v12a1 1 0 0 1-1 1z" opacity=".5" fill="currentColor"></path></svg>
78
+ Metrics</a></li>
79
+
80
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
81
+
82
+ hover:underline
83
+ v2-dropdown-entry" href="/languages">
84
+ <svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-primary" d="M17 13H7a1 1 0 0 1 0-2h10a1 1 0 0 1 0 2z" fill="currentColor"></path><path class="uim-tertiary" d="M12 2a10 10 0 0 0-7.743 16.33l-1.964 1.963A1 1 0 0 0 3 22h9a10 10 0 0 0 0-20zM9 7h6a1 1 0 0 1 0 2H9a1 1 0 0 1 0-2zm6 10H9a1 1 0 0 1 0-2h6a1 1 0 0 1 0 2zm2-4H7a1 1 0 0 1 0-2h10a1 1 0 0 1 0 2z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M15 17H9a1 1 0 0 1 0-2h6a1 1 0 0 1 0 2zm0-8H9a1 1 0 0 1 0-2h6a1 1 0 0 1 0 2z" fill="currentColor"></path></svg>
85
+ Languages</a></li>
86
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
87
+
88
+ hover:underline
89
+ v2-dropdown-entry" href="/organizations">
90
+ <svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-tertiary" d="M12 18a3.5 3.5 0 1 1 3.5-3.5A3.504 3.504 0 0 1 12 18z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M14.64 16.772a3.452 3.452 0 0 1-5.28 0A4.988 4.988 0 0 0 7 21a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1a4.988 4.988 0 0 0-2.36-4.228z" fill="currentColor"></path><path class="uim-tertiary" d="M21 12a.996.996 0 0 1-.664-.252L12 4.338l-8.336 7.41a1 1 0 0 1-1.328-1.496l9-8a.999.999 0 0 1 1.328 0l9 8A1 1 0 0 1 21 12z" opacity=".5" fill="currentColor"></path><path class="uim-quaternary" d="M12 4.338l-8 7.111V21a1 1 0 0 0 1 1h3a1 1 0 0 1-1-1a4.988 4.988 0 0 1 2.36-4.228A3.469 3.469 0 0 1 8.5 14.5a3.5 3.5 0 0 1 7 0a3.469 3.469 0 0 1-.86 2.272A4.988 4.988 0 0 1 17 21a1 1 0 0 1-1 1h3a1 1 0 0 0 1-1v-9.551z" opacity=".25" fill="currentColor"></path></svg>
91
+ Organizations</a></li></ul></li>
92
+ <li><div class="col-span-full px-4 py-0.5 flex items-center justify-between font-semibold bg-gradient-to-r to-white dark:to-gray-925 whitespace-nowrap text-yellow-800 from-yellow-50 dark:text-yellow-100 dark:from-yellow-900">Community
93
+ </div>
94
+ <ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
95
+
96
+ hover:underline
97
+ v2-dropdown-entry" href="https://discuss.huggingface.co/" target="_blank">
98
+
99
+ Forum</a></li>
100
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
101
+
102
+ hover:underline
103
+ v2-dropdown-entry" href="/blog">
104
+
105
+ Blog</a></li>
106
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
107
+
108
+ hover:underline
109
+ v2-dropdown-entry" href="/join/discord" target="_blank">
110
+
111
+ Discord</a></li>
112
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
113
+
114
+ hover:underline
115
+ v2-dropdown-entry" href="/course">
116
+
117
+ Course</a></li></ul></li>
118
+ <li><div class="col-span-full px-4 py-0.5 flex items-center justify-between font-semibold bg-gradient-to-r to-white dark:to-gray-925 whitespace-nowrap text-green-800 from-green-50 dark:text-green-100 dark:from-green-900">Documentation
119
+ </div>
120
+ <ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
121
+
122
+ hover:underline
123
+ v2-dropdown-entry" href="/docs">
124
+
125
+ Doc Search</a></li>
126
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
127
+
128
+ hover:underline
129
+ v2-dropdown-entry" href="/docs/hub">
130
+
131
+ Hub doc</a></li>
132
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
133
+
134
+ hover:underline
135
+ v2-dropdown-entry" href="/docs/accelerate/">
136
+
137
+ Accelerate doc</a></li>
138
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
139
+
140
+ hover:underline
141
+ v2-dropdown-entry" href="/docs/autonlp/">
142
+
143
+ AutoNLP doc</a></li>
144
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
145
+
146
+ hover:underline
147
+ v2-dropdown-entry" href="/docs/datasets/">
148
+
149
+ Datasets doc</a></li>
150
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
151
+
152
+ hover:underline
153
+ v2-dropdown-entry" href="https://api-inference.huggingface.co/docs/" target="_blank">
154
+
155
+ Inference API doc</a></li>
156
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
157
+
158
+ hover:underline
159
+ v2-dropdown-entry" href="/docs/sagemaker">
160
+
161
+ SageMaker doc</a></li>
162
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
163
+
164
+ hover:underline
165
+ v2-dropdown-entry" href="/docs/tokenizers/python/latest/">
166
+
167
+ Tokenizers doc</a></li>
168
+ <li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
169
+
170
+ hover:underline
171
+ v2-dropdown-entry" href="/docs/transformers/">
172
+
173
+ Transformers doc</a></li></ul></li>
174
+ </ul></div>
175
+ </div></li>
176
+
177
+ <li><div class="relative v2-dropdown">
178
+ <button class="
179
+ px-2 py-0.5 group hover:text-green-700 dark:hover:text-gray-400 flex items-center
180
+
181
+ v2-dropdown-button" type="button">
182
+ <svg class="mr-1.5 text-gray-400 group-hover:text-green-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-tertiary" d="M19 6H5a3 3 0 0 0-3 3v2.72L8.837 14h6.326L22 11.72V9a3 3 0 0 0-3-3z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M10 6V5h4v1h2V5a2.002 2.002 0 0 0-2-2h-4a2.002 2.002 0 0 0-2 2v1h2zm-1.163 8L2 11.72V18a3.003 3.003 0 0 0 3 3h14a3.003 3.003 0 0 0 3-3v-6.28L15.163 14H8.837z" fill="currentColor"></path></svg>
183
+ Solutions
184
+ </button>
185
+
186
+
187
+ <div class="absolute top-full mt-1 min-w-full w-auto bg-white rounded-xl overflow-hidden shadow-lg z-10 border border-gray-100
188
+ left-0
189
+ !w-64 v2-dropdown-menu hidden"><ul class="min-w-full w-auto">
190
+ <li><ul><li><a href="/support" data-ga-category="header-menu" data-ga-action="clicked support" data-ga-label="premium support" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 25 24"><path d="M12.6213 22.4475C12.7096 22.4825 12.8038 22.5003 12.8988 22.5C13.068 22.4991 13.2318 22.4409 13.3638 22.335L17.1138 19.335C17.2022 19.2652 17.2737 19.1763 17.323 19.0751C17.3724 18.9738 17.3983 18.8627 17.3988 18.75V13.0575L20.2338 10.23C21.0025 9.46567 21.6117 8.5563 22.0263 7.55467C22.4409 6.55304 22.6524 5.47907 22.6488 4.39505V3.00005C22.6488 2.60222 22.4908 2.22069 22.2095 1.93939C21.9281 1.65808 21.5466 1.50005 21.1488 1.50005H19.7538C18.6698 1.49639 17.5958 1.70798 16.5942 2.12254C15.5925 2.5371 14.6832 3.14638 13.9188 3.91505L11.0913 6.75005H5.39879C5.28552 6.75146 5.17405 6.77851 5.07273 6.82917C4.97141 6.87983 4.88288 6.95278 4.81379 7.04255L1.81379 10.7925C1.73008 10.8963 1.67553 11.0205 1.65576 11.1523C1.63599 11.2841 1.6517 11.4188 1.70129 11.5425C1.75027 11.6654 1.83088 11.7732 1.93494 11.8548C2.03899 11.9365 2.1628 11.9892 2.29379 12.0075L7.54379 12.7575L11.4063 16.605L12.1563 21.855C12.1747 21.986 12.2274 22.1098 12.309 22.2139C12.3907 22.318 12.4984 22.3986 12.6213 22.4475Z" opacity=".5" fill="currentColor"></path><path d="M11.0837 10.9416L5.08569 16.9396L7.20363 19.0576L13.2017 13.0595L11.0837 10.9416Z" opacity="1" fill="currentColor"></path></svg></div>
191
+ <div class="font-medium leading-tight">Expert Support
192
+ <p class="text-sm font-normal text-gray-400 whitespace-nowrap">Accelerate your ML roadmap</p>
193
+ </div></a>
194
+ </li><li><a href="/inference-api" data-ga-category="header-menu" data-ga-action="clicked inference api" data-ga-label="accelerated inference" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M8 9H4a2 2 0 0 0-2 2v12h2v-5h4v5h2V11a2 2 0 0 0-2-2zm-4 7v-5h4v5z" fill="currentColor"></path><path d="M22 11h3v10h-3v2h8v-2h-3V11h3V9h-8v2z" fill="currentColor"></path><path d="M14 23h-2V9h6a2 2 0 0 1 2 2v5a2 2 0 0 1-2 2h-4zm0-7h4v-5h-4z" fill="currentColor"></path></svg></div>
195
+ <div class="font-medium leading-tight">Inference API
196
+ <p class="text-sm font-normal text-gray-400 whitespace-nowrap">Use +20k models via API calls</p>
197
+ </div></a>
198
+ </li><li><a href="/autonlp" data-ga-category="header-menu" data-ga-action="clicked autonlp" data-ga-label="autonlp" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" viewBox="0 0 327 270" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" style="transform: rotate(360deg);"><path fill-rule="evenodd" clip-rule="evenodd" d="M51.963 85.696L51.9843 85.7142L52.0059 85.732C55.1982 88.3543 59.2834 89.5397 63.9988 89.5397H78.4986V69.3054H70.5681V40.5348C70.5681 30.8859 67.5907 23.2435 61.2359 18.1491C55.13 13.0873 46.7983 10.7302 36.5895 10.7302C28.2498 10.7302 21.1616 12.1163 15.4724 15.0602C10.0168 17.8391 5.83986 21.4966 3.12193 26.0961L2.00182 27.9917L17.1258 41.4352L18.8589 39.0419C20.6131 36.6193 22.6796 34.6836 25.06 33.2061C27.1956 31.8806 30.1823 31.1037 34.2242 31.1037C38.6379 31.1037 41.027 32.1553 42.2298 33.5299L42.2688 33.5745L42.3098 33.6174C43.6986 35.0693 44.6292 37.486 44.6292 41.3696V42.3135H35.7547C24.9484 42.3135 16.3129 44.149 10.2104 48.1829C3.87027 52.3738 0.802246 58.8507 0.802246 67.1092C0.802246 74.4698 3.09622 80.537 7.93618 84.9467L7.95908 84.9676L7.98247 84.9879C12.8409 89.2126 19.125 91.2093 26.5719 91.2093C32.519 91.2093 37.7242 89.9194 42.0438 87.1839C44.6415 85.5735 46.7327 83.5117 48.3183 81.0336C49.2175 82.8133 50.4256 84.3783 51.963 85.696ZM49.1349 74.4024C49.1268 74.3369 49.119 74.2711 49.1115 74.205C49.1158 74.2426 49.1201 74.2801 49.1246 74.3175C49.128 74.3458 49.1314 74.3741 49.1349 74.4024ZM42.0076 70.2099C40.0307 71.601 37.5325 72.3663 34.3634 72.3663C31.4475 72.3663 29.4241 71.7802 28.0549 70.8674C26.9105 70.1045 26.3237 69.0722 26.3237 67.3875V64.6048C26.3237 62.5484 27.028 61.3156 28.304 60.4603C29.7696 59.4779 32.3054 58.7913 36.3112 58.7913H44.6292V64.744C44.6292 67.3052 43.7283 68.999 42.0076 70.2099ZM101.033 89.3317L101.067 89.346L101.102 89.3594C104.318 90.6044 107.803 91.2093 111.528 91.2093C117.713 91.2093 122.939 89.6592 126.894 86.2606C128.359 85.0311 129.675 83.697 130.838 82.2601V89.5397H156.776V12.3998H130.838V62.2396C130.838 63.6201 130.55 64.6767 130.077 65.5045C129.516 66.4857 128.783 67.3276 127.859 68.041C127.056 68.6406 126.032 69.1518 124.739 69.5376C123.49 69.8452 122.196 70.001 120.85 70.001C117.332 70.001 115.151 69.0073 113.789 67.4414C112.364 65.8044 111.419 63.0712 111.419 58.7612V12.3998H85.4801V61.5439C85.4801 66.125 86.0642 70.2695 87.2869 73.9374L87.2973 73.9689L87.3086 74C88.6115 77.608 90.3932 80.7401 92.6858 83.3453C94.9963 85.9709 97.7881 87.9709 101.033 89.3317ZM166.262 12.3998V33.7473H185.741V66.1353C185.741 72.7418 187.742 78.3845 191.881 82.8418C196.179 87.4702 202.536 89.5397 210.397 89.5397H239.645V68.1923H211.68V33.7473H239.645V12.3998H211.68V0.359375H187.41V6.58626C187.41 9.65572 186.762 10.8926 186.317 11.3055C185.766 11.8168 184.435 12.3998 181.597 12.3998H166.262ZM272.247 88.5173L272.27 88.5262L272.293 88.5347C277.091 90.3342 282.425 91.2093 288.258 91.2093C294.09 91.2093 299.424 90.3342 304.223 88.5347L304.246 88.5262L304.268 88.5173C309.051 86.6239 313.139 83.9115 316.486 80.3663C319.921 76.8262 322.52 72.5541 324.304 67.5974C326.096 62.6209 326.967 57.0662 326.967 50.9698C326.967 44.8733 326.096 39.3187 324.304 34.3421C322.52 29.3855 319.921 25.1134 316.486 21.5733C313.135 18.0231 309.038 15.3534 304.246 13.5525C299.439 11.6538 294.097 10.7302 288.258 10.7302C282.418 10.7302 277.076 11.6538 272.27 13.5525C267.477 15.3534 263.342 18.0202 259.905 21.5585L259.891 21.5728L259.877 21.5873C256.539 25.1276 253.992 29.3954 252.211 34.3421C250.42 39.3187 249.549 44.8733 249.549 50.9698C249.549 57.0662 250.42 62.6209 252.211 67.5974C253.992 72.5442 256.539 76.812 259.877 80.3523L259.891 80.3668L259.905 80.3811C263.337 83.9145 267.464 86.6239 272.247 88.5173ZM297.126 67.1259C295.135 69.3301 292.301 70.5576 288.258 70.5576C284.214 70.5576 281.38 69.3301 279.389 67.1259C277.376 64.8968 276.183 61.5428 276.183 56.6742V45.2653C276.183 40.3968 277.376 37.0428 279.389 34.8136C281.38 32.6095 284.214 31.382 288.258 31.382C292.301 31.382 295.135 32.6095 297.126 34.8136C299.14 37.0428 300.332 40.3968 300.332 45.2653V56.6742C300.332 61.5428 299.14 64.8968 297.126 67.1259Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M24.1832 151.837C24.1832 149.255 26.2767 147.161 28.8594 147.161H73.3409C73.3027 147.546 73.2833 147.936 73.2833 148.33V185.74C73.2833 192.196 78.5171 197.43 84.9737 197.43C91.4303 197.43 96.6642 192.196 96.6642 185.74V148.33C96.6642 147.936 96.6448 147.546 96.6066 147.161H117.511C116.885 148.593 116.538 150.175 116.538 151.837V246.53H65.0999V202.107C65.0999 195.65 59.866 190.416 53.4094 190.416C46.9528 190.416 41.7189 195.65 41.7189 202.107V246.53H28.8594C26.2767 246.53 24.1832 244.437 24.1832 241.854V151.837ZM139.919 246.53H234.873C234.702 245.778 234.612 244.996 234.612 244.192C234.612 238.381 229.901 233.671 224.091 233.671H193.695C183.365 233.671 174.99 225.296 174.99 214.966V147.161H138.946C139.572 148.593 139.919 150.175 139.919 151.837V246.53ZM298.91 246.53H257.458C257.805 245.423 257.993 244.245 257.993 243.023V228.995C257.993 218.664 249.619 210.29 239.288 210.29H217.076C206.746 210.29 198.371 201.915 198.371 191.585V147.161H298.91C301.492 147.161 303.586 149.255 303.586 151.837V241.854C303.586 244.437 301.492 246.53 298.91 246.53ZM28.8594 123.78C13.3638 123.78 0.802246 136.342 0.802246 151.837V241.854C0.802246 257.35 13.3638 269.911 28.8594 269.911H298.91C314.405 269.911 326.967 257.35 326.967 241.854V151.837C326.967 136.342 314.405 123.78 298.91 123.78H28.8594ZM240.457 162.359C231.418 162.359 224.091 169.686 224.091 178.726C224.091 187.765 231.418 195.092 240.457 195.092C249.496 195.092 256.824 187.765 256.824 178.726C256.824 169.686 249.496 162.359 240.457 162.359Z" fill="currentColor"></path></svg></div>
199
+ <div class="font-medium leading-tight">AutoNLP
200
+ <p class="text-sm font-normal text-gray-400 whitespace-nowrap">Create ML models without code</p>
201
+ </div></a>
202
+ </li><li><a href="/infinity" data-ga-category="header-menu" data-ga-action="clicked infinity" data-ga-label="infinity" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" width="1em" height="1em" viewBox="0 0 349 155" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img"><path fill-rule="evenodd" clip-rule="evenodd" d="M77.4254 42.0939C58.0156 42.0939 42.2809 57.799 42.2809 77.1722C42.2809 96.5454 58.0156 112.25 77.4254 112.25V154.344C34.7239 154.344 0.107422 119.793 0.107422 77.1722C0.107422 34.5512 34.7239 0 77.4254 0C116.684 0 144.788 19.3459 167.187 40.3015L137.675 70.504C118.96 53.1048 101.389 42.0939 77.4254 42.0939ZM181.033 114.057C203.574 135.043 231.897 154.344 271.531 154.344V112.25C247.156 112.25 229.306 101.201 210.542 83.8571L181.033 114.057Z" fill="currentColor" opacity="0.5"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M271.141 42.0939C290.551 42.0939 306.286 57.799 306.286 77.1722C306.286 96.5454 290.551 112.25 271.141 112.25V112.304C270.876 112.294 270.609 112.289 270.34 112.289C258.72 112.289 249.3 121.709 249.3 133.329C249.3 144.949 258.72 154.369 270.34 154.369C270.685 154.369 271.027 154.36 271.368 154.344C313.965 154.222 348.459 119.718 348.459 77.1722C348.459 34.5512 313.843 0 271.141 0C219.197 0 186.78 33.8682 161.269 60.5213C160.594 61.227 159.923 61.9276 159.257 62.6224C131.402 91.6825 110.291 112.25 77.0352 112.25V112.289C77.0352 112.289 77.0352 112.289 77.0351 112.289C65.4151 112.289 55.9951 121.709 55.9951 133.329C55.9951 144.949 65.4151 154.369 77.0351 154.369C77.4121 154.369 77.7867 154.359 78.1587 154.339C130.221 153.858 162.646 120.001 188.168 93.3526L188.213 93.306L188.262 93.255C188.754 92.7412 189.243 92.2301 189.73 91.722C217.708 62.5338 238.492 42.0939 271.141 42.0939Z" fill="currentColor"></path></svg></div>
203
+ <div class="font-medium leading-tight">Infinity
204
+ <p class="text-sm font-normal text-gray-400 whitespace-nowrap">Optimize to 1ms latency</p>
205
+ </div></a>
206
+ </li><li><a href="/hardware" data-ga-category="header-menu" data-ga-action="clicked hardware" data-ga-label="hardware" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 230 230"><path fill-rule="evenodd" clip-rule="evenodd" d="M196.384 70.1626V77.0991C196.378 80.4596 195.489 83.7596 193.808 86.6691C192.126 89.5785 189.71 91.9955 186.801 93.6783L123.359 130.287C120.724 131.532 117.839 132.184 114.926 132.203V106.1L193.797 60.4834C195.474 63.4351 196.365 66.7701 196.384 70.1626ZM196.384 111.562V118.499C196.378 121.859 195.489 125.159 193.808 128.068C192.126 130.978 189.71 133.395 186.801 135.078L123.359 171.686C120.724 172.932 117.839 173.584 114.926 173.603V147.499L193.797 101.883C195.474 104.834 196.365 108.169 196.384 111.562ZM196.384 159.897V152.96C196.365 149.568 195.474 146.233 193.797 143.281L114.926 188.898V215.001C117.839 214.982 120.724 214.33 123.359 213.084L186.801 176.476C189.71 174.793 192.126 172.376 193.808 169.467C195.489 166.557 196.378 163.257 196.384 159.897Z" fill="currentColor"></path><path opacity="0.25" fill-rule="evenodd" clip-rule="evenodd" d="M193.605 60.4489L160.32 79.7003L186.897 95.0442C189.676 96.7021 191.985 99.05 193.605 101.848L160.32 121.099L186.897 136.443C189.676 138.101 191.985 140.448 193.605 143.247L114.734 188.863L35.8633 143.247C37.4824 140.444 39.7917 138.101 42.5716 136.443L44.3827 135.397L106.355 171.226C109.073 172.546 112.054 173.234 115.075 173.239V147.499L36.2047 101.883C36.1814 101.924 36.1583 101.965 36.1353 102.006L35.8633 101.848C37.4824 99.0452 39.7917 96.7029 42.5716 95.0442L44.3836 93.9981L106.355 129.827C109.073 131.147 112.054 131.835 115.075 131.839V106.1L36.2047 60.4834C36.1814 60.5243 36.1583 60.5652 36.1353 60.6063L35.8633 60.4489C37.4824 57.6458 39.7917 55.3035 42.5716 53.6448L105.151 17.5156C111.092 14.1615 118.376 14.1615 124.317 17.5156L186.897 53.6448C189.676 55.3027 191.985 57.6506 193.605 60.4489Z" fill="currentColor"></path><path opacity="0.5" fill-rule="evenodd" clip-rule="evenodd" d="M115.075 131.435V105.696L36.2047 60.0791C34.5242 63.0293 33.633 66.3631 33.6172 69.7583V76.3307C33.6235 79.6912 34.512 82.9912 36.1938 85.9006C37.8755 88.8101 40.2917 91.227 43.2005 92.9098L106.355 129.422C109.073 130.742 112.054 131.43 115.075 131.435ZM115.075 147.499V173.239C112.054 173.234 109.073 172.546 106.355 171.226L43.2005 134.714C40.2917 133.031 37.8755 130.614 36.1938 127.704C34.512 124.795 33.6235 121.495 33.6172 118.134V111.562C33.633 108.167 34.5242 104.833 36.2047 101.883L115.075 147.499ZM115.075 188.898V214.637C112.054 214.633 109.073 213.945 106.355 212.624L43.2005 176.112C40.2917 174.429 37.8755 172.012 36.1938 169.103C34.512 166.193 33.6235 162.893 33.6172 159.533V152.96C33.633 149.565 34.5242 146.231 36.2047 143.281L115.075 188.898Z" fill="currentColor"></path></svg></div>
207
+ <div class="font-medium leading-tight">Hardware
208
+ <p class="text-sm font-normal text-gray-400 whitespace-nowrap">Scale with dedicated hardware</p>
209
+ </div></a>
210
+ </li><li><a href="/platform" data-ga-category="header-menu" data-ga-action="clicked platform" data-ga-label="platform" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg></div>
211
+ <div class="font-medium leading-tight">Platform
212
+ <p class="text-sm font-normal text-gray-400 whitespace-nowrap">Collaborate better on ML</p>
213
+ </div></a>
214
+ </li></ul></li>
215
+ </ul></div>
216
+ </div></li>
217
+
218
+ <li><a class="flex items-center group px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400" href="/pricing" data-ga-category="header-menu" data-ga-action="clicked pricing" data-ga-label="pricing">Pricing
219
+ </a></li>
220
+ <li><hr class="mx-2 w-0.5 h-5 border-none bg-gray-100 dark:bg-gray-800"></li>
221
+ <li><a class="px-2 py-0.5 block cursor-pointer hover:text-gray-500 dark:hover:text-gray-400" href="/login">Log In
222
+ </a></li>
223
+ <li><a class="ml-2 btn" href="/join">Sign Up </a></li></ul></nav></div></header>
224
+
225
+
226
+ <main class="flex flex-col flex-1 "><header class="bg-gradient-to-t from-gray-50-to-white via-white dark:via-gray-950 pt-10 "><div class="container relative"><h1 class="flex items-center flex-wrap text-lg leading-tight mb-2 md:text-xl ">
227
+ <div class="flex items-center mb-1"><img class="w-4 h-4 mr-1.5 rounded" alt="Flax Community's picture" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1624969772076-5dfcb1aada6d0311fd3d5448.jpeg?w=200&amp;h=200&amp;f=face">
228
+ <a href="/flax-community" class="font-sans text-gray-400 hover:text-blue-600">flax-community</a>
229
+ <div class="text-gray-300 mx-0.5">/</div></div>
230
+ <div class="mb-1"><a class="font-mono font-semibold" href="/flax-community/gpt2-medium-indonesian">gpt2-medium-indonesian</a>
231
+ <div class="SVELTE_HYDRATER inline mr-5" data-props="{&quot;label&quot;:&quot;objectInfo name&quot;,&quot;noText&quot;:true,&quot;value&quot;:&quot;flax-community/gpt2-medium-indonesian&quot;}" data-target="CopyButton"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none
232
+
233
+ mx-0.5
234
+ text-gray-600
235
+
236
+ " title="Copy objectInfo name to clipboard" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
237
+
238
+ <div class="
239
+ absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow
240
+ left-1/2 top-full transform -translate-x-1/2 translate-y-2
241
+ opacity-0
242
+ "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="
243
+ border-left-color: transparent;
244
+ border-right-color: transparent;
245
+ "></div>
246
+ Copied</div></button></div></div>
247
+ <div class="SVELTE_HYDRATER mr-5 mb-1" data-props="{&quot;isLikedByUser&quot;:false,&quot;likes&quot;:0,&quot;repoId&quot;:&quot;flax-community/gpt2-medium-indonesian&quot;,&quot;repoType&quot;:&quot;model&quot;}" data-target="LikeButton"><div class="inline-flex items-center border leading-none whitespace-nowrap text-sm rounded-md text-gray-500 overflow-hidden bg-white
248
+ "><button class="relative flex items-center px-1.5 py-1 hover:bg-gradient-to-t focus:outline-none from-red-50 to-transparent dark:from-red-900 dark:to-red-800 overflow-hidden" title="Like"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" fill="currentColor"><path d="M22.45,6a5.47,5.47,0,0,1,3.91,1.64,5.7,5.7,0,0,1,0,8L16,26.13,5.64,15.64a5.7,5.7,0,0,1,0-8,5.48,5.48,0,0,1,7.82,0L16,10.24l2.53-2.58A5.44,5.44,0,0,1,22.45,6m0-2a7.47,7.47,0,0,0-5.34,2.24L16,7.36,14.89,6.24a7.49,7.49,0,0,0-10.68,0,7.72,7.72,0,0,0,0,10.82L16,29,27.79,17.06a7.72,7.72,0,0,0,0-10.82A7.49,7.49,0,0,0,22.45,4Z"></path></svg>
249
+
250
+ <svg class="mr-1 absolute text-red-500 origin-center transform transition ease-in\n\t\t\t\ttranslate-y-10 scale-0" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" fill="currentColor"><path d="M22.5,4c-2,0-3.9,0.8-5.3,2.2L16,7.4l-1.1-1.1C12,3.3,7.2,3.3,4.3,6.2c0,0-0.1,0.1-0.1,0.1c-3,3-3,7.8,0,10.8L16,29l11.8-11.9c3-3,3-7.8,0-10.8C26.4,4.8,24.5,4,22.5,4z"></path></svg>
251
+ like
252
+ </button>
253
+ <button class="flex items-center px-1.5 py-1 border-l text-gray-400 focus:outline-none hover:bg-gray-50 dark:hover:bg-gray-700 focus:bg-gray-100 " title="See users who liked this repository">0</button></div>
254
+ </div>
255
+ </h1>
256
+ <div class="SVELTE_HYDRATER " data-props="{&quot;tagObjs&quot;:[{&quot;id&quot;:&quot;text-generation&quot;,&quot;label&quot;:&quot;Text Generation&quot;,&quot;subType&quot;:&quot;nlp&quot;,&quot;type&quot;:&quot;pipeline_tag&quot;},{&quot;id&quot;:&quot;pytorch&quot;,&quot;label&quot;:&quot;PyTorch&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;jax&quot;,&quot;label&quot;:&quot;JAX&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;tensorboard&quot;,&quot;label&quot;:&quot;TensorBoard&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;transformers&quot;,&quot;label&quot;:&quot;Transformers&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;id&quot;,&quot;label&quot;:&quot;id&quot;,&quot;type&quot;:&quot;language&quot;},{&quot;id&quot;:&quot;gpt2&quot;,&quot;label&quot;:&quot;gpt2&quot;,&quot;type&quot;:&quot;other&quot;}]}" data-target="ModelHeaderTags"><div class="flex flex-wrap mb-3 lg:mb-5"><a class="tag
257
+ tag-white" href="/models?pipeline_tag=text-generation">
258
+ <div class="tag-ico tag-ico-indigo"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 18 18"><path d="M16.2607 8.08202L14.468 6.28928C14.3063 6.12804 14.0873 6.03749 13.859 6.03749C13.6307 6.03749 13.4117 6.12804 13.25 6.28928L5.6375 13.904V16.9125H8.64607L16.2607 9.30002C16.422 9.13836 16.5125 8.91935 16.5125 8.69102C16.5125 8.4627 16.422 8.24369 16.2607 8.08202V8.08202ZM8.1953 15.825H6.725V14.3547L11.858 9.22118L13.3288 10.6915L8.1953 15.825ZM14.0982 9.92262L12.6279 8.45232L13.8606 7.21964L15.3309 8.68994L14.0982 9.92262Z"></path><path d="M6.18125 9.84373H7.26875V6.03748H8.9V4.94998H4.55V6.03748H6.18125V9.84373Z"></path><path d="M4.55 11.475H2.375V2.775H11.075V4.95H12.1625V2.775C12.1625 2.48658 12.0479 2.20997 11.844 2.00602C11.64 1.80208 11.3634 1.6875 11.075 1.6875H2.375C2.08658 1.6875 1.80997 1.80208 1.60602 2.00602C1.40207 2.20997 1.2875 2.48658 1.2875 2.775V11.475C1.2875 11.7634 1.40207 12.04 1.60602 12.244C1.80997 12.4479 2.08658 12.5625 2.375 12.5625H4.55V11.475Z"></path></svg></div>
259
+ <span>Text Generation</span>
260
+ </a><a class="tag
261
+ tag-red" href="/models?library=pytorch"><svg class="inline-block ml-2 text-sm" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="0.83em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 310"><path d="M218.281 90.106c50.292 50.292 50.292 130.969 0 181.61c-49.244 50.292-130.27 50.292-180.562 0s-50.292-131.318 0-181.61L127.825 0v45.053l-8.382 8.382l-59.721 59.722c-37.72 37.02-37.72 97.79 0 135.509c37.02 37.719 97.79 37.719 135.509 0c37.719-37.02 37.719-97.79 0-135.51l23.05-23.05zm-45.053-5.588c-9.259 0-16.764-7.505-16.764-16.764c0-9.258 7.505-16.764 16.764-16.764c9.258 0 16.764 7.506 16.764 16.764c0 9.259-7.506 16.764-16.764 16.764z" fill="#EE4C2C"></path></svg>
262
+ <span>PyTorch</span>
263
+ </a><a class="tag
264
+ tag-red" href="/models?library=jax"><svg class="inline-block ml-2 text-sm" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1.73em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 451 260.81"><style>.J {
265
+ stroke: #dce0df;
266
+ }
267
+ .K {
268
+ stroke-linejoin: round;
269
+ }
270
+ </style><g fill="#5e97f6" class="J K"><path d="M50.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M.5 217.01l25-43.3h50l-25 43.3H.5z"></path><path d="M125.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M175.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M150.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M175.5 87.1l-25 43.3h50l25-43.3h-50z"></path><path d="M200.5 43.8l-25 43.3h50l25-43.3h-50z"></path><path d="M225.5.5l-25 43.3h50l25-43.3h-50z"></path></g><g fill="#2a56c6" class="J K"><path d="M.5 217.01l25 43.3h50l-25-43.3H.5z"></path><path d="M125.5 260.31h-50l-25-43.3h50l25 43.3z"></path><path d="M175.5 260.31h-50l-25-43.3h50l25 43.3z"></path></g><g fill="#00796b" class="J K"><path d="M200.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zm50-86.61l-25-43.3-25 43.3h50z"></path><path d="M250.5 43.8l-25 43.3 25 43.3 25-43.3-25-43.3z"></path></g><path d="M125.5 173.71l-25-43.31-25 43.31h50z" fill="#3367d6" class="J K"></path><g fill="#26a69a" class="J K"><path d="M250.5 130.4h-50l-25 43.31h50l25-43.31z"></path><path d="M300.5 130.4h-50l-25 43.31h50l25-43.31z"></path></g><g fill="#9c27b0" class="J K"><path d="M350.5 43.8L325.5.5l-25 43.3 25 43.3 25-43.3z"></path><path d="M375.5 87.1l-25-43.3-25 43.3 25 43.3 25-43.3z"></path><path d="M400.5 130.4l-25-43.3-25 43.3 25 43.31 25-43.31z"></path><path d="M425.5 173.71l-25-43.31-25 43.31 25 43.3 25-43.3z"></path><path d="M450.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zM425.5.5l-25 43.3 25 43.3 25-43.3-25-43.3z"></path><path d="M375.5 87.1l25-43.3 25 43.3-25 43.3-25-43.3zm-25 43.3l-25 43.31 25 43.3 25-43.3-25-43.31z"></path><path d="M325.5 260.31l-25-43.3 25-43.3 25 43.3-25 43.3z"></path></g><path d="M275.5 260.31l-25-43.3h50l25 43.3h-50z" fill="#6a1b9a" class="J K"></path><g fill="#00695c" class="J K"><path d="M225.5 173.71h-50l25 43.3h50l-25-43.3z"></path><path d="M275.5 173.71h-50l25 43.3 25-43.3zm0-86.61l25 43.3h50l-25-43.3h-50z"></path><path d="M300.5 43.8h-50l25 43.3h50l-25-43.3zm125 216.51l-25-43.3h-50l25 43.3h50z"></path><path d="M375.5 173.71l-25 43.3h50l-25-43.3z"></path></g><g fill="#ea80fc" class="J K"><path d="M325.5.5h-50l-25 43.3h50l25-43.3zm0 173.21h-50l-25 43.3h50l25-43.3z"></path><path d="M350.5 130.4h-50l-25 43.31h50l25-43.31zM425.5.5h-50l-25 43.3h50l25-43.3z"></path><path d="M375.5 87.1l-25-43.3h50l-25 43.3z"></path></g></svg>
271
+ <span>JAX</span>
272
+ </a><a class="tag
273
+ tag-red" href="/models?library=tensorboard">
274
+ <span>TensorBoard</span>
275
+ </a><a class="tag
276
+ tag-red" href="/models?library=transformers">
277
+ <span>Transformers</span>
278
+ </a><a class="tag
279
+ tag-green" href="/models?language=id">
280
+ <span>id</span>
281
+ </a><a class="tag
282
+ tag-purple" href="/models?other=gpt2">
283
+ <span>gpt2</span>
284
+ </a></div></div>
285
+ <div class="border-b border-gray-100"><div class="flex flex-col-reverse lg:flex-row lg:items-center lg:justify-between"><div class="flex items-center h-12 -mb-px overflow-x-auto overflow-y-hidden"><a class="tab-alternate " href="/flax-community/gpt2-medium-indonesian"><svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg>
286
+ Model card
287
+ </a>
288
+ <a class="tab-alternate active" href="/flax-community/gpt2-medium-indonesian/tree/main"><svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-tertiary" d="M21 19h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2zm0-4h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2zm0-8h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2zm0 4h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M9 19a1 1 0 0 1-1-1V6a1 1 0 0 1 2 0v12a1 1 0 0 1-1 1zm-6-4.333a1 1 0 0 1-.64-1.769L3.438 12l-1.078-.898a1 1 0 0 1 1.28-1.538l2 1.667a1 1 0 0 1 0 1.538l-2 1.667a.999.999 0 0 1-.64.231z" fill="currentColor"></path></svg>
289
+ <span class="xl:hidden">Files</span>
290
+ <span class="hidden xl:inline">Files and versions</span></a>
291
+ <a class="tab-alternate " href="/flax-community/gpt2-medium-indonesian/tensorboard"><svg class="mr-1.5 text-gray-300 dark:text-gray-500 w-3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid" viewBox="0 0 256 287"><path d="M133.446809,0 L256,69.7191489 L256,133.446809 L182.468085,90.4170213 L182.468085,122.553191 L218.961702,143.795745 L219.506383,198.808511 L182.468085,177.565957 L182.468085,258.178723 L133.446809,286.502128 L133.446809,0 Z M122.553191,0 L122.553191,286.502128 L73.5319149,258.178723 L73.5319149,90.4170213 L0,133.446809 L0,69.7191489 L122.553191,0 Z" fill="currentColor"></path></svg>
292
+ <span class="xl:hidden">Metrics</span>
293
+ <span class="hidden xl:inline">Training metrics</span></a>
294
+ </div>
295
+ <div class="SVELTE_HYDRATER " data-props="{&quot;model&quot;:{&quot;author&quot;:&quot;flax-community&quot;,&quot;branch&quot;:&quot;main&quot;,&quot;cardData&quot;:{&quot;language&quot;:&quot;id&quot;,&quot;widget&quot;:[{&quot;text&quot;:&quot;Sewindu sudah kita tak berjumpa, rinduku padamu sudah tak terkira.&quot;}]},&quot;cardExists&quot;:true,&quot;config&quot;:{&quot;architectures&quot;:[&quot;GPT2LMHeadModel&quot;],&quot;model_type&quot;:&quot;gpt2&quot;,&quot;task_specific_params&quot;:{&quot;text-generation&quot;:{&quot;do_sample&quot;:true,&quot;max_length&quot;:50}}},&quot;id&quot;:&quot;flax-community/gpt2-medium-indonesian&quot;,&quot;lastModified&quot;:&quot;2021-09-02T12:22:45.000Z&quot;,&quot;pipeline_tag&quot;:&quot;text-generation&quot;,&quot;library_name&quot;:&quot;transformers&quot;,&quot;model-index&quot;:null,&quot;private&quot;:false,&quot;gated&quot;:false,&quot;pwcLink&quot;:{&quot;error&quot;:&quot;Unknown error, can&#39;t generate link to Papers With Code.&quot;},&quot;siblings&quot;:[{&quot;rfilename&quot;:&quot;.gitattributes&quot;},{&quot;rfilename&quot;:&quot;.gitignore&quot;},{&quot;rfilename&quot;:&quot;README.md&quot;},{&quot;rfilename&quot;:&quot;added_tokens.json&quot;},{&quot;rfilename&quot;:&quot;config.json&quot;},{&quot;rfilename&quot;:&quot;create_config.py&quot;},{&quot;rfilename&quot;:&quot;create_tokenizer.py&quot;},{&quot;rfilename&quot;:&quot;events.out.tfevents.1625840127.t1v-n-528d9406-w-0.245719.3.v2&quot;},{&quot;rfilename&quot;:&quot;events.out.tfevents.1625843003.t1v-n-528d9406-w-0.250031.3.v2&quot;},{&quot;rfilename&quot;:&quot;events.out.tfevents.1625892207.t1v-n-528d9406-w-0.296755.3.v2&quot;},{&quot;rfilename&quot;:&quot;flax_model.msgpack&quot;},{&quot;rfilename&quot;:&quot;jax2torch.py&quot;},{&quot;rfilename&quot;:&quot;merges.txt&quot;},{&quot;rfilename&quot;:&quot;pytorch_model.bin&quot;},{&quot;rfilename&quot;:&quot;replace_token_script.py&quot;},{&quot;rfilename&quot;:&quot;run_clm_flax.py&quot;},{&quot;rfilename&quot;:&quot;run_finetuning.sh&quot;},{&quot;rfilename&quot;:&quot;run_pretraining.sh&quot;},{&quot;rfilename&quot;:&quot;special_tokens_map.json&quot;},{&quot;rfilename&quot;:&quot;tokenizer.json&quot;},{&quot;rfilename&quot;:&quot;tokenizer_config.json&quot;},{&quot;rfilename&quot;:&quot;vocab.json&quot;},{&quot;rfilename&quot;:&quot;text_collection/README.md&quot;},{&quot;rfilename&quot;:&quot;text_collection/text_collection.py&quot;}],&quot;tags&quot;:[&quot;pytorch&quot;,&quot;jax&quot;,&quot;tensorboard&quot;,&quot;gpt2&quot;,&quot;text-generation&quot;,&quot;id&quot;,&quot;transformers&quot;],&quot;tag_objs&quot;:[{&quot;id&quot;:&quot;text-generation&quot;,&quot;label&quot;:&quot;Text Generation&quot;,&quot;subType&quot;:&quot;nlp&quot;,&quot;type&quot;:&quot;pipeline_tag&quot;},{&quot;id&quot;:&quot;pytorch&quot;,&quot;label&quot;:&quot;PyTorch&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;jax&quot;,&quot;label&quot;:&quot;JAX&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;tensorboard&quot;,&quot;label&quot;:&quot;TensorBoard&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;transformers&quot;,&quot;label&quot;:&quot;Transformers&quot;,&quot;type&quot;:&quot;library&quot;},{&quot;id&quot;:&quot;id&quot;,&quot;label&quot;:&quot;id&quot;,&quot;type&quot;:&quot;language&quot;},{&quot;id&quot;:&quot;gpt2&quot;,&quot;label&quot;:&quot;gpt2&quot;,&quot;type&quot;:&quot;other&quot;}],&quot;transformersInfo&quot;:{&quot;auto_model&quot;:&quot;AutoModelForCausalLM&quot;,&quot;pipeline_tag&quot;:&quot;text-generation&quot;,&quot;processor&quot;:&quot;AutoTokenizer&quot;},&quot;widgetData&quot;:[{&quot;text&quot;:&quot;Sewindu sudah kita tak berjumpa, rinduku padamu sudah tak terkira.&quot;}],&quot;likes&quot;:0,&quot;isLikedByUser&quot;:false}}" data-target="ModelHeaderActions">
296
+
297
+
298
+ <div class="relative mb-1.5 space-y-1 sm:flex sm:space-y-0 sm:space-x-1.5 lg:mb-0"><div><div class="relative ">
299
+ <button class="
300
+ text-sm btn
301
+ cursor-pointer w-full btn text-sm
302
+ " type="button">
303
+ <svg class="mr-1.5 " xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M12.1 2a9.8 9.8 0 0 0-5.4 1.6l6.4 6.4a2.1 2.1 0 0 1 .2 3a2.1 2.1 0 0 1-3-.2L3.7 6.4A9.84 9.84 0 0 0 2 12.1a10.14 10.14 0 0 0 10.1 10.1a10.9 10.9 0 0 0 2.6-.3l6.7 6.7a5 5 0 0 0 7.1-7.1l-6.7-6.7a10.9 10.9 0 0 0 .3-2.6A10 10 0 0 0 12.1 2zm8 10.1a7.61 7.61 0 0 1-.3 2.1l-.3 1.1l.8.8l6.7 6.7a2.88 2.88 0 0 1 .9 2.1A2.72 2.72 0 0 1 27 27a2.9 2.9 0 0 1-4.2 0l-6.7-6.7l-.8-.8l-1.1.3a7.61 7.61 0 0 1-2.1.3a8.27 8.27 0 0 1-5.7-2.3A7.63 7.63 0 0 1 4 12.1a8.33 8.33 0 0 1 .3-2.2l4.4 4.4a4.14 4.14 0 0 0 5.9.2a4.14 4.14 0 0 0-.2-5.9L10 4.2a6.45 6.45 0 0 1 2-.3a8.27 8.27 0 0 1 5.7 2.3a8.49 8.49 0 0 1 2.4 5.9z" fill="currentColor"></path></svg>
304
+ Train
305
+ <svg class="-mr-1 text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M7 10l5 5l5-5z" fill="currentColor"></path></svg></button>
306
+
307
+
308
+
309
+ </div>
310
+ </div>
311
+ <div><div class="relative ">
312
+ <button class="
313
+ text-sm btn
314
+ cursor-pointer w-full btn text-sm
315
+ " type="button">
316
+ <svg class="mr-1.5 " xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><rect x="6.34" y="19" width="11.31" height="2" transform="translate(-10.63 14.34) rotate(-45)"></rect><path d="M17,30a1,1,0,0,1-.37-.07,1,1,0,0,1-.62-.79l-1-7,2-.28.75,5.27L21,24.52V17a1,1,0,0,1,.29-.71l4.07-4.07A8.94,8.94,0,0,0,28,5.86V4H26.14a8.94,8.94,0,0,0-6.36,2.64l-4.07,4.07A1,1,0,0,1,15,11H7.48L4.87,14.26l5.27.75-.28,2-7-1a1,1,0,0,1-.79-.62,1,1,0,0,1,.15-1l4-5A1,1,0,0,1,7,9h7.59l3.77-3.78A10.92,10.92,0,0,1,26.14,2H28a2,2,0,0,1,2,2V5.86a10.92,10.92,0,0,1-3.22,7.78L23,17.41V25a1,1,0,0,1-.38.78l-5,4A1,1,0,0,1,17,30Z"></path></svg>
317
+ Deploy
318
+ <svg class="-mr-1 text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M7 10l5 5l5-5z" fill="currentColor"></path></svg></button>
319
+
320
+
321
+
322
+ </div>
323
+ </div>
324
+ <div class=" "><button class="cursor-pointer w-full
325
+
326
+ btn
327
+ text-sm" type="button" ><svg class="mr-1.5 " xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7z" fill="currentColor"></path><path d="M1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7z" fill="currentColor"></path><path d="M12.419 25.484L17.639 6l1.932.518L14.35 26z" fill="currentColor"></path></svg>
328
+ Use in Transformers</button>
329
+ </div></div></div>
330
+ </div></div></div></header>
331
+
332
+ <div class="container relative flex flex-col md:grid md:space-y-0 w-full
333
+ md:grid-cols-12
334
+
335
+ space-y-4
336
+ md:gap-6
337
+ mb-16
338
+ "><section class="pt-8 border-gray-100 col-span-full"><header class="pb-2 flex items-center justify-between flex-wrap"><div class="flex flex-wrap items-center"><div class="relative mr-4 mb-2 v2-dropdown">
339
+ <button class="
340
+ text-base
341
+ cursor-pointer w-full btn text-sm
342
+ v2-dropdown-button" type="button">
343
+ <svg class="mr-1.5 text-gray-700 dark:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M13 14c-3.36 0-4.46 1.35-4.82 2.24C9.25 16.7 10 17.76 10 19a3 3 0 0 1-3 3a3 3 0 0 1-3-3c0-1.31.83-2.42 2-2.83V7.83A2.99 2.99 0 0 1 4 5a3 3 0 0 1 3-3a3 3 0 0 1 3 3c0 1.31-.83 2.42-2 2.83v5.29c.88-.65 2.16-1.12 4-1.12c2.67 0 3.56-1.34 3.85-2.23A3.006 3.006 0 0 1 14 7a3 3 0 0 1 3-3a3 3 0 0 1 3 3c0 1.34-.88 2.5-2.09 2.86C17.65 11.29 16.68 14 13 14m-6 4a1 1 0 0 0-1 1a1 1 0 0 0 1 1a1 1 0 0 0 1-1a1 1 0 0 0-1-1M7 4a1 1 0 0 0-1 1a1 1 0 0 0 1 1a1 1 0 0 0 1-1a1 1 0 0 0-1-1m10 2a1 1 0 0 0-1 1a1 1 0 0 0 1 1a1 1 0 0 0 1-1a1 1 0 0 0-1-1z" fill="currentColor"></path></svg>
344
+ main
345
+ <svg class="-mr-1 text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M7 10l5 5l5-5z" fill="currentColor"></path></svg></button>
346
+
347
+
348
+ <div class="absolute top-full mt-1 min-w-full w-auto bg-white rounded-xl overflow-hidden shadow-lg z-10 border border-gray-100
349
+ left-0
350
+ v2-dropdown-menu hidden"><ul class="min-w-full w-auto">
351
+ <li><ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
352
+
353
+
354
+ v2-dropdown-entry" href="/flax-community/gpt2-medium-indonesian/blob/main/tokenizer_config.json">
355
+
356
+ main</a></li></ul></li>
357
+ <li><ul class="bg-yellow-50"></ul></li>
358
+ </ul></div>
359
+ </div>
360
+ <div class="flex items-center flex-wrap mb-2"><a class="hover:underline text-gray-800" href="/flax-community/gpt2-medium-indonesian/tree/main">gpt2-medium-indonesian</a>
361
+ <span class="text-gray-300 mx-1 font-light">/</span>
362
+ <span class="font-light dark:text-gray-300">tokenizer_config.json</span>
363
+
364
+ </div></div>
365
+
366
+ <div class="flex flex-row items-center mb-2">
367
+ </div></header>
368
+ <div class="border border-b-0 dark:border-gray-800 px-3 py-2 flex items-baseline rounded-t-lg bg-gradient-to-t from-gray-100-to-white"><img class="w-4 h-4 rounded-full mt-0.5 mr-2.5 self-center" alt="cahya's picture" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1597134725486-5e4d607b37cb5b49818287c0.jpeg?w=200&amp;h=200&amp;f=face">
369
+ <div class="mr-5 truncate flex items-center flex-none"><a class="hover:underline" href="/cahya">cahya
370
+ </a>
371
+
372
+ </div>
373
+ <a class="mr-4 font-mono text-sm text-gray-500 truncate hover:underline" href="/flax-community/gpt2-medium-indonesian/commit/e7934b783a74f9bac5a7cb05ec98cda450f46f7e">add tokenizers files</a>
374
+ <a class="text-sm border dark:border-gray-800 px-1.5 rounded bg-gray-50 dark:bg-gray-900 hover:underline" href="/flax-community/gpt2-medium-indonesian/commit/e7934b783a74f9bac5a7cb05ec98cda450f46f7e">e7934b7</a>
375
+
376
+
377
+ <time class="ml-auto hidden lg:block text-gray-500 dark:text-gray-400 truncate flex-none pl-2" datetime="2021-07-10T05:40:02" title="Sat, 10 Jul 2021 05:40:02 GMT">5 months ago</time></div>
378
+ <div class="flex flex-wrap items-center justify-between px-3 py-1.5 border dark:border-gray-800 text-sm text-gray-800 dark:bg-gray-900"><div class="flex flex-wrap items-center"><a class="flex items-center hover:underline my-1 mr-4" href="/flax-community/gpt2-medium-indonesian/raw/main/tokenizer_config.json"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7z" fill="currentColor"></path><path d="M1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7z" fill="currentColor"></path><path d="M12.419 25.484L17.639 6l1.932.518L14.35 26z" fill="currentColor"></path></svg>
379
+ raw
380
+ </a><a class="flex items-center hover:underline my-1 mr-4" href="/flax-community/gpt2-medium-indonesian/commits/main/tokenizer_config.json"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M16 4C9.383 4 4 9.383 4 16s5.383 12 12 12s12-5.383 12-12S22.617 4 16 4zm0 2c5.535 0 10 4.465 10 10s-4.465 10-10 10S6 21.535 6 16S10.465 6 16 6zm-1 2v9h7v-2h-5V8z" fill="currentColor"></path></svg>
381
+ history
382
+ </a><a class="flex items-center hover:underline my-1 mr-4" href="/flax-community/gpt2-medium-indonesian/blame/main/tokenizer_config.json"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M16 2a14 14 0 1 0 14 14A14 14 0 0 0 16 2zm0 26a12 12 0 1 1 12-12a12 12 0 0 1-12 12z" fill="currentColor"></path><path d="M11.5 11a2.5 2.5 0 1 0 2.5 2.5a2.48 2.48 0 0 0-2.5-2.5z" fill="currentColor"></path><path d="M20.5 11a2.5 2.5 0 1 0 2.5 2.5a2.48 2.48 0 0 0-2.5-2.5z" fill="currentColor"></path></svg>
383
+ blame
384
+ </a></div>
385
+ <div class="dark:text-gray-300">207 Bytes</div></div>
386
+
387
+ <div class="border border-t-0 rounded-b-lg dark:bg-gray-925 dark:border-gray-800 leading-tight"><div class="py-3"><div class="SVELTE_HYDRATER " data-props="{&quot;lines&quot;:[&quot;{&lt;span class=\\&quot;hljs-attr\\&quot;&gt;&amp;quot;unk_token&amp;quot;&lt;/span&gt;: &lt;span class=\\&quot;hljs-string\\&quot;&gt;&amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;&lt;/span&gt;, &lt;span class=\\&quot;hljs-attr\\&quot;&gt;&amp;quot;bos_token&amp;quot;&lt;/span&gt;: &lt;span class=\\&quot;hljs-string\\&quot;&gt;&amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;&lt;/span&gt;, &lt;span class=\\&quot;hljs-attr\\&quot;&gt;&amp;quot;eos_token&amp;quot;&lt;/span&gt;: &lt;span class=\\&quot;hljs-string\\&quot;&gt;&amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;&lt;/span&gt;, &lt;span class=\\&quot;hljs-attr\\&quot;&gt;&amp;quot;add_prefix_space&amp;quot;&lt;/span&gt;: &lt;span class=\\&quot;hljs-literal\\&quot;&gt;false&lt;/span&gt;, &lt;span class=\\&quot;hljs-attr\\&quot;&gt;&amp;quot;special_tokens_map_file&amp;quot;&lt;/span&gt;: &lt;span class=\\&quot;hljs-literal\\&quot;&gt;null&lt;/span&gt;, &lt;span class=\\&quot;hljs-attr\\&quot;&gt;&amp;quot;name_or_path&amp;quot;&lt;/span&gt;: &lt;span class=\\&quot;hljs-string\\&quot;&gt;&amp;quot;.&amp;quot;&lt;/span&gt;, &lt;span class=\\&quot;hljs-attr\\&quot;&gt;&amp;quot;tokenizer_class&amp;quot;&lt;/span&gt;: &lt;span class=\\&quot;hljs-string\\&quot;&gt;&amp;quot;GPT2Tokenizer&amp;quot;&lt;/span&gt;}&quot;]}" data-target="BlobContent"><div class="relative text-sm"><div class="overflow-x-auto"><table><tr class="" id="L1"><td class="text-right select-none pl-5 pr-3 cursor-pointer text-gray-300 hover:text-black"><pre>1</pre></td>
388
+ <td class="px-3 w-full"><pre>{<span class="hljs-attr">&quot;unk_token&quot;</span>: <span class="hljs-string">&quot;&lt;|endoftext|&gt;&quot;</span>, <span class="hljs-attr">&quot;bos_token&quot;</span>: <span class="hljs-string">&quot;&lt;|endoftext|&gt;&quot;</span>, <span class="hljs-attr">&quot;eos_token&quot;</span>: <span class="hljs-string">&quot;&lt;|endoftext|&gt;&quot;</span>, <span class="hljs-attr">&quot;add_prefix_space&quot;</span>: <span class="hljs-literal">false</span>, <span class="hljs-attr">&quot;special_tokens_map_file&quot;</span>: <span class="hljs-literal">null</span>, <span class="hljs-attr">&quot;name_or_path&quot;</span>: <span class="hljs-string">&quot;.&quot;</span>, <span class="hljs-attr">&quot;tokenizer_class&quot;</span>: <span class="hljs-string">&quot;GPT2Tokenizer&quot;</span>}</pre></td>
389
+ </tr></table></div>
390
+ </div></div></div></div></section></div></main>
391
+ </div>
392
+
393
+ <script>
394
+ import("/front/build/module/index.219a3fdc.js");
395
+ window.supportsDynamicImport = true;
396
+ </script>
397
+ <script>
398
+ if (!window.supportsDynamicImport) {
399
+ const systemJsLoaderTag = document.createElement("script");
400
+ systemJsLoaderTag.src =
401
+ "https://unpkg.com/[email protected]/dist/s.min.js";
402
+ systemJsLoaderTag.addEventListener("load", function () {
403
+ System.import("./front/build/nomodule/index.219a3fdc.js");
404
+ });
405
+ document.head.appendChild(systemJsLoaderTag);
406
+ }
407
+ </script>
408
+
409
+ <script type="text/javascript">
410
+ /// LinkedIn (part 1)
411
+ _linkedin_partner_id = "3734489";
412
+ window._linkedin_data_partner_ids =
413
+ window._linkedin_data_partner_ids || [];
414
+ window._linkedin_data_partner_ids.push(_linkedin_partner_id);
415
+ </script>
416
+
417
+ <script>
418
+ if (
419
+ !(
420
+ ["localhost", "huggingface.test"].includes(
421
+ window.location.hostname
422
+ ) || window.location.hostname.includes("ngrok.io")
423
+ )
424
+ ) {
425
+ (function (i, s, o, g, r, a, m) {
426
+ i["GoogleAnalyticsObject"] = r;
427
+ (i[r] =
428
+ i[r] ||
429
+ function () {
430
+ (i[r].q = i[r].q || []).push(arguments);
431
+ }),
432
+ (i[r].l = 1 * new Date());
433
+ (a = s.createElement(o)), (m = s.getElementsByTagName(o)[0]);
434
+ a.async = 1;
435
+ a.src = g;
436
+ m.parentNode.insertBefore(a, m);
437
+ })(
438
+ window,
439
+ document,
440
+ "script",
441
+ "https://www.google-analytics.com/analytics.js",
442
+ "ganalytics"
443
+ );
444
+ ganalytics("create", "UA-83738774-2", "auto");
445
+ ganalytics("send", "pageview");
446
+
447
+ /// LinkedIn (part 2)
448
+ (function (l) {
449
+ if (!l) {
450
+ window.lintrk = function (a, b) {
451
+ window.lintrk.q.push([a, b]);
452
+ };
453
+ window.lintrk.q = [];
454
+ }
455
+ var s = document.getElementsByTagName("script")[0];
456
+ var b = document.createElement("script");
457
+ b.type = "text/javascript";
458
+ b.async = true;
459
+ b.src = "https://snap.licdn.com/li.lms-analytics/insight.min.js";
460
+ s.parentNode.insertBefore(b, s);
461
+ })(window.lintrk);
462
+
463
+ /// Twitter
464
+ !(function (e, t, n, s, u, a) {
465
+ e.twq ||
466
+ ((s = e.twq =
467
+ function () {
468
+ s.exe ? s.exe.apply(s, arguments) : s.queue.push(arguments);
469
+ }),
470
+ (s.version = "1.1"),
471
+ (s.queue = []),
472
+ (u = t.createElement(n)),
473
+ (u.async = !0),
474
+ (u.src = "//static.ads-twitter.com/uwt.js"),
475
+ (a = t.getElementsByTagName(n)[0]),
476
+ a.parentNode.insertBefore(u, a));
477
+ })(window, document, "script");
478
+ twq("init", "o6bfm");
479
+ twq("track", "PageView");
480
+ }
481
+ </script>
482
+
483
+ <noscript>
484
+ <!-- LinkedIn (part 3) -->
485
+ <img
486
+ height="1"
487
+ width="1"
488
+ style="display: none"
489
+ alt=""
490
+ src="https://px.ads.linkedin.com/collect/?pid=3734489&fmt=gif"
491
+ />
492
+ </noscript>
493
+ </body>
494
+ </html>