Add model and scripts
Browse files- .gitignore +2 -0
- config.json +40 -0
- run_clm_flax.py +889 -0
- run_gpt.sh +33 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +494 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
data
|
2 |
+
*~
|
config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": ".",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.0,
|
8 |
+
"bos_token_id": 50256,
|
9 |
+
"embd_pdrop": 0.0,
|
10 |
+
"eos_token_id": 50256,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 1024,
|
15 |
+
"n_embd": 1024,
|
16 |
+
"n_head": 16,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 24,
|
19 |
+
"n_positions": 1024,
|
20 |
+
"n_special": 0,
|
21 |
+
"predict_special_tokens": true,
|
22 |
+
"reorder_and_upcast_attn": false,
|
23 |
+
"resid_pdrop": 0.0,
|
24 |
+
"scale_attn_by_inverse_layer_idx": false,
|
25 |
+
"scale_attn_weights": true,
|
26 |
+
"summary_activation": null,
|
27 |
+
"summary_first_dropout": 0.1,
|
28 |
+
"summary_proj_to_labels": true,
|
29 |
+
"summary_type": "cls_index",
|
30 |
+
"summary_use_proj": true,
|
31 |
+
"task_specific_params": {
|
32 |
+
"text-generation": {
|
33 |
+
"do_sample": true,
|
34 |
+
"max_length": 50
|
35 |
+
}
|
36 |
+
},
|
37 |
+
"transformers_version": "4.13.0",
|
38 |
+
"use_cache": true,
|
39 |
+
"vocab_size": 50257
|
40 |
+
}
|
run_clm_flax.py
ADDED
@@ -0,0 +1,889 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding=utf-8
|
3 |
+
# Copyright 2021 The HuggingFace Team All rights reserved.
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
# limitations under the License.
|
16 |
+
"""
|
17 |
+
Pre-training/Fine-tuning the library models for causal language modeling (GPT, GPT-2, CTRL, ...) on a text file or a dataset.
|
18 |
+
|
19 |
+
Here is the full list of checkpoints on the hub that can be fine-tuned by this script:
|
20 |
+
https://huggingface.co/models?filter=text-generation
|
21 |
+
"""
|
22 |
+
# You can also adapt this script on your own causal language modeling task. Pointers for this are left as comments.
|
23 |
+
|
24 |
+
import json
|
25 |
+
import logging
|
26 |
+
import math
|
27 |
+
import os
|
28 |
+
import sys
|
29 |
+
import time
|
30 |
+
from dataclasses import asdict, dataclass, field
|
31 |
+
from enum import Enum
|
32 |
+
from itertools import chain
|
33 |
+
from pathlib import Path
|
34 |
+
from typing import Callable, Optional
|
35 |
+
import json
|
36 |
+
import shutil
|
37 |
+
|
38 |
+
import datasets
|
39 |
+
import numpy as np
|
40 |
+
from datasets import Dataset, load_dataset
|
41 |
+
from tqdm import tqdm
|
42 |
+
|
43 |
+
import jax
|
44 |
+
import jax.numpy as jnp
|
45 |
+
import optax
|
46 |
+
import transformers
|
47 |
+
from flax import jax_utils, traverse_util
|
48 |
+
from flax.jax_utils import unreplicate
|
49 |
+
from flax.training import train_state
|
50 |
+
from flax.training.checkpoints import save_checkpoint, restore_checkpoint
|
51 |
+
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
|
52 |
+
from flax.serialization import to_bytes, from_bytes
|
53 |
+
from transformers import (
|
54 |
+
CONFIG_MAPPING,
|
55 |
+
FLAX_MODEL_FOR_CAUSAL_LM_MAPPING,
|
56 |
+
AutoConfig,
|
57 |
+
AutoTokenizer,
|
58 |
+
FlaxAutoModelForCausalLM,
|
59 |
+
HfArgumentParser,
|
60 |
+
is_tensorboard_available,
|
61 |
+
set_seed,
|
62 |
+
)
|
63 |
+
from transformers.file_utils import get_full_repo_name
|
64 |
+
from transformers.testing_utils import CaptureLogger
|
65 |
+
|
66 |
+
|
67 |
+
logger = logging.getLogger(__name__)
|
68 |
+
|
69 |
+
MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_CAUSAL_LM_MAPPING.keys())
|
70 |
+
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
|
71 |
+
|
72 |
+
|
73 |
+
@dataclass
|
74 |
+
class TrainingArguments:
|
75 |
+
output_dir: str = field(
|
76 |
+
metadata={"help": "The output directory where the model predictions and checkpoints will be written."},
|
77 |
+
)
|
78 |
+
overwrite_output_dir: bool = field(
|
79 |
+
default=False,
|
80 |
+
metadata={
|
81 |
+
"help": (
|
82 |
+
"Overwrite the content of the output directory. "
|
83 |
+
"Use this to continue training if output_dir points to a checkpoint directory."
|
84 |
+
)
|
85 |
+
},
|
86 |
+
)
|
87 |
+
do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
|
88 |
+
do_eval: bool = field(default=False, metadata={"help": "Whether to run eval on the dev set."})
|
89 |
+
per_device_train_batch_size: int = field(
|
90 |
+
default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for training."}
|
91 |
+
)
|
92 |
+
per_device_eval_batch_size: int = field(
|
93 |
+
default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for evaluation."}
|
94 |
+
)
|
95 |
+
learning_rate: float = field(default=5e-5, metadata={"help": "The initial learning rate for AdamW."})
|
96 |
+
weight_decay: float = field(default=0.0, metadata={"help": "Weight decay for AdamW if we apply some."})
|
97 |
+
adam_beta1: float = field(default=0.9, metadata={"help": "Beta1 for AdamW optimizer"})
|
98 |
+
adam_beta2: float = field(default=0.999, metadata={"help": "Beta2 for AdamW optimizer"})
|
99 |
+
adam_epsilon: float = field(default=1e-8, metadata={"help": "Epsilon for AdamW optimizer."})
|
100 |
+
adafactor: bool = field(default=False, metadata={"help": "Whether or not to replace AdamW by Adafactor."})
|
101 |
+
num_train_epochs: float = field(default=3.0, metadata={"help": "Total number of training epochs to perform."})
|
102 |
+
warmup_steps: int = field(default=0, metadata={"help": "Linear warmup over warmup_steps."})
|
103 |
+
logging_steps: int = field(default=500, metadata={"help": "Log every X updates steps."})
|
104 |
+
save_steps: int = field(default=500, metadata={"help": "Save checkpoint every X updates steps."})
|
105 |
+
eval_steps: int = field(default=None, metadata={"help": "Run an evaluation every X steps."})
|
106 |
+
seed: int = field(default=42, metadata={"help": "Random seed that will be set at the beginning of training."})
|
107 |
+
push_to_hub: bool = field(
|
108 |
+
default=False, metadata={"help": "Whether or not to upload the trained model to the model hub after training."}
|
109 |
+
)
|
110 |
+
hub_model_id: str = field(
|
111 |
+
default=None, metadata={"help": "The name of the repository to keep in sync with the local `output_dir`."}
|
112 |
+
)
|
113 |
+
hub_token: str = field(default=None, metadata={"help": "The token to use to push to the Model Hub."})
|
114 |
+
|
115 |
+
def __post_init__(self):
|
116 |
+
if self.output_dir is not None:
|
117 |
+
self.output_dir = os.path.expanduser(self.output_dir)
|
118 |
+
|
119 |
+
def to_dict(self):
|
120 |
+
"""
|
121 |
+
Serializes this instance while replace `Enum` by their values (for JSON serialization support). It obfuscates
|
122 |
+
the token values by removing their value.
|
123 |
+
"""
|
124 |
+
d = asdict(self)
|
125 |
+
for k, v in d.items():
|
126 |
+
if isinstance(v, Enum):
|
127 |
+
d[k] = v.value
|
128 |
+
if isinstance(v, list) and len(v) > 0 and isinstance(v[0], Enum):
|
129 |
+
d[k] = [x.value for x in v]
|
130 |
+
if k.endswith("_token"):
|
131 |
+
d[k] = f"<{k.upper()}>"
|
132 |
+
return d
|
133 |
+
|
134 |
+
|
135 |
+
@dataclass
|
136 |
+
class ModelArguments:
|
137 |
+
"""
|
138 |
+
Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
|
139 |
+
"""
|
140 |
+
|
141 |
+
model_name_or_path: Optional[str] = field(
|
142 |
+
default=None,
|
143 |
+
metadata={
|
144 |
+
"help": "The model checkpoint for weights initialization."
|
145 |
+
"Don't set if you want to train a model from scratch."
|
146 |
+
},
|
147 |
+
)
|
148 |
+
model_type: Optional[str] = field(
|
149 |
+
default=None,
|
150 |
+
metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
|
151 |
+
)
|
152 |
+
config_name: Optional[str] = field(
|
153 |
+
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
|
154 |
+
)
|
155 |
+
tokenizer_name: Optional[str] = field(
|
156 |
+
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
|
157 |
+
)
|
158 |
+
cache_dir: Optional[str] = field(
|
159 |
+
default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
|
160 |
+
)
|
161 |
+
use_fast_tokenizer: bool = field(
|
162 |
+
default=True,
|
163 |
+
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
|
164 |
+
)
|
165 |
+
dtype: Optional[str] = field(
|
166 |
+
default="float32",
|
167 |
+
metadata={
|
168 |
+
"help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
|
169 |
+
},
|
170 |
+
)
|
171 |
+
|
172 |
+
|
173 |
+
@dataclass
|
174 |
+
class DataTrainingArguments:
|
175 |
+
"""
|
176 |
+
Arguments pertaining to what data we are going to input our model for training and eval.
|
177 |
+
"""
|
178 |
+
|
179 |
+
dataset_name: Optional[str] = field(
|
180 |
+
default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
|
181 |
+
)
|
182 |
+
dataset_config_name: Optional[str] = field(
|
183 |
+
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
|
184 |
+
)
|
185 |
+
train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
|
186 |
+
validation_file: Optional[str] = field(
|
187 |
+
default=None,
|
188 |
+
metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
|
189 |
+
)
|
190 |
+
max_train_samples: Optional[int] = field(
|
191 |
+
default=None,
|
192 |
+
metadata={
|
193 |
+
"help": "For debugging purposes or quicker training, truncate the number of training examples to this "
|
194 |
+
"value if set."
|
195 |
+
},
|
196 |
+
)
|
197 |
+
max_eval_samples: Optional[int] = field(
|
198 |
+
default=None,
|
199 |
+
metadata={
|
200 |
+
"help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
201 |
+
"value if set."
|
202 |
+
},
|
203 |
+
)
|
204 |
+
overwrite_cache: bool = field(
|
205 |
+
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
206 |
+
)
|
207 |
+
validation_split_percentage: Optional[int] = field(
|
208 |
+
default=5,
|
209 |
+
metadata={
|
210 |
+
"help": "The percentage of the train set used as validation set in case there's no validation split"
|
211 |
+
},
|
212 |
+
)
|
213 |
+
block_size: Optional[int] = field(
|
214 |
+
default=None,
|
215 |
+
metadata={
|
216 |
+
"help": "Optional input sequence length after tokenization. "
|
217 |
+
"The training dataset will be truncated in block of this size for training. "
|
218 |
+
"Default to the model max input length for single sentence inputs (take into account special tokens)."
|
219 |
+
},
|
220 |
+
)
|
221 |
+
overwrite_cache: bool = field(
|
222 |
+
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
|
223 |
+
)
|
224 |
+
preprocessing_num_workers: Optional[int] = field(
|
225 |
+
default=None,
|
226 |
+
metadata={"help": "The number of processes to use for the preprocessing."},
|
227 |
+
)
|
228 |
+
keep_linebreaks: bool = field(
|
229 |
+
default=True, metadata={"help": "Whether to keep line breaks when using TXT files or not."}
|
230 |
+
)
|
231 |
+
|
232 |
+
def __post_init__(self):
|
233 |
+
if self.dataset_name is None and self.train_file is None and self.validation_file is None:
|
234 |
+
raise ValueError("Need either a dataset name or a training/validation file.")
|
235 |
+
else:
|
236 |
+
if self.train_file is not None:
|
237 |
+
extension = self.train_file.split(".")[-1]
|
238 |
+
assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, a json or a txt file."
|
239 |
+
if self.validation_file is not None:
|
240 |
+
extension = self.validation_file.split(".")[-1]
|
241 |
+
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
|
242 |
+
|
243 |
+
|
244 |
+
class TrainState(train_state.TrainState):
|
245 |
+
dropout_rng: jnp.ndarray
|
246 |
+
|
247 |
+
def replicate(self):
|
248 |
+
return jax_utils.replicate(self).replace(dropout_rng=shard_prng_key(self.dropout_rng))
|
249 |
+
|
250 |
+
|
251 |
+
def data_loader(rng: jax.random.PRNGKey, dataset: Dataset, batch_size: int, shuffle: bool = False):
|
252 |
+
"""
|
253 |
+
Returns batches of size `batch_size` from truncated `dataset`, sharded over all local devices.
|
254 |
+
Shuffle batches if `shuffle` is `True`.
|
255 |
+
"""
|
256 |
+
steps_per_epoch = len(dataset) // batch_size
|
257 |
+
|
258 |
+
if shuffle:
|
259 |
+
batch_idx = jax.random.permutation(rng, len(dataset))
|
260 |
+
else:
|
261 |
+
batch_idx = jnp.arange(len(dataset))
|
262 |
+
|
263 |
+
batch_idx = batch_idx[: steps_per_epoch * batch_size] # Skip incomplete batch.
|
264 |
+
batch_idx = batch_idx.reshape((steps_per_epoch, batch_size))
|
265 |
+
|
266 |
+
for idx in batch_idx:
|
267 |
+
batch = dataset[idx]
|
268 |
+
batch = {k: np.array(v) for k, v in batch.items()}
|
269 |
+
|
270 |
+
yield batch
|
271 |
+
|
272 |
+
|
273 |
+
def write_train_metric(summary_writer, train_metrics, train_time, step):
|
274 |
+
summary_writer.scalar("train_time", train_time, step)
|
275 |
+
|
276 |
+
train_metrics = get_metrics(train_metrics)
|
277 |
+
for key, vals in train_metrics.items():
|
278 |
+
tag = f"train_{key}"
|
279 |
+
for i, val in enumerate(vals):
|
280 |
+
summary_writer.scalar(tag, val, step - len(vals) + i + 1)
|
281 |
+
|
282 |
+
|
283 |
+
def write_eval_metric(summary_writer, eval_metrics, step):
|
284 |
+
for metric_name, value in eval_metrics.items():
|
285 |
+
summary_writer.scalar(f"eval_{metric_name}", value, step)
|
286 |
+
|
287 |
+
|
288 |
+
def create_learning_rate_fn(
|
289 |
+
train_ds_size: int, train_batch_size: int, num_train_epochs: int, num_warmup_steps: int, learning_rate: float
|
290 |
+
) -> Callable[[int], jnp.array]:
|
291 |
+
"""Returns a linear warmup, linear_decay learning rate function."""
|
292 |
+
steps_per_epoch = train_ds_size // train_batch_size
|
293 |
+
num_train_steps = steps_per_epoch * num_train_epochs
|
294 |
+
warmup_fn = optax.linear_schedule(init_value=0.0, end_value=learning_rate, transition_steps=num_warmup_steps)
|
295 |
+
decay_fn = optax.linear_schedule(
|
296 |
+
init_value=learning_rate, end_value=0, transition_steps=num_train_steps - num_warmup_steps
|
297 |
+
)
|
298 |
+
schedule_fn = optax.join_schedules(schedules=[warmup_fn, decay_fn], boundaries=[num_warmup_steps])
|
299 |
+
return schedule_fn
|
300 |
+
|
301 |
+
|
302 |
+
# utils
|
303 |
+
def mb_item(x):
|
304 |
+
return x.item() if hasattr(x, "item") else x
|
305 |
+
|
306 |
+
|
307 |
+
# checkpoint functions
|
308 |
+
def save_model_checkpoint(model, save_dir, state, with_opt: bool = True, push_to_hub: bool = False):
|
309 |
+
"""
|
310 |
+
If `push_to_hub` is True, will save to `save_dir`. Otherwise will save to `save_dir/ckpt-{step}`.
|
311 |
+
"""
|
312 |
+
state = jax_utils.unreplicate(state)
|
313 |
+
logger.info(f"SAVING CHECKPOINT IN {save_dir}...")
|
314 |
+
if not push_to_hub:
|
315 |
+
save_dir = f"{save_dir}/ckpt-{mb_item(state.step) - 1}"
|
316 |
+
model.save_pretrained(
|
317 |
+
save_dir,
|
318 |
+
params=state.params,
|
319 |
+
push_to_hub=push_to_hub,
|
320 |
+
commit_message=f"Saving weights and logs at step {mb_item(state.step) - 1}",
|
321 |
+
)
|
322 |
+
if with_opt:
|
323 |
+
with open(os.path.join(save_dir, "opt_state.msgpack"), "wb") as f:
|
324 |
+
f.write(to_bytes(state.opt_state))
|
325 |
+
with open(os.path.join(save_dir, "training_state.json"), "w") as f:
|
326 |
+
json.dump({"step": state.step.item()}, f)
|
327 |
+
logger.info("checkpoint saved")
|
328 |
+
|
329 |
+
|
330 |
+
# this is added to make resuming from checkpoint to work with adafactor
|
331 |
+
# to be removed when issue is fixed
|
332 |
+
# notice that adafactor state is perturbed by fake_update
|
333 |
+
def _zeros_tree_like(inp_tree):
|
334 |
+
return jax.tree_map(jnp.zeros_like, inp_tree)
|
335 |
+
|
336 |
+
|
337 |
+
def fake_update(state):
|
338 |
+
fake_updates = _zeros_tree_like(state.params)
|
339 |
+
_, new_inner_opt_state = state.tx.inner_opt.update(fake_updates, state.opt_state.inner_opt_state, state.params)
|
340 |
+
opt_state = state.opt_state
|
341 |
+
new_opt_state = optax.MultiStepsState(mini_step=opt_state.mini_step,
|
342 |
+
gradient_step=opt_state.gradient_step,
|
343 |
+
inner_opt_state=new_inner_opt_state,
|
344 |
+
acc_grads=opt_state.acc_grads)
|
345 |
+
return state.replace(opt_state=new_opt_state)
|
346 |
+
|
347 |
+
|
348 |
+
def reinstantiate_states(opt_state):
|
349 |
+
new_state = []
|
350 |
+
for state in opt_state:
|
351 |
+
if isinstance(state, list):
|
352 |
+
new_state.append(reinstantiate_states(state))
|
353 |
+
else:
|
354 |
+
cls = getattr(optax, type(state).__name__)
|
355 |
+
new_state.append(cls(**{k: getattr(state, k) for k in state._fields}))
|
356 |
+
return new_state
|
357 |
+
|
358 |
+
|
359 |
+
def restore_model_checkpoint(save_dir, state):
|
360 |
+
logger.info(f"RESTORING CHECKPOINT FROM {save_dir}...")
|
361 |
+
with open(os.path.join(save_dir, "flax_model.msgpack"), "rb") as f:
|
362 |
+
params = from_bytes(state.params, f.read())
|
363 |
+
|
364 |
+
with open(os.path.join(save_dir, "opt_state.msgpack"), "rb") as f:
|
365 |
+
opt_state = from_bytes(state.opt_state, f.read())
|
366 |
+
|
367 |
+
with open(os.path.join(save_dir, "training_state.json"), "r") as f:
|
368 |
+
training_state = json.load(f)
|
369 |
+
step = training_state["step"]
|
370 |
+
|
371 |
+
logger.info("checkpoint restored")
|
372 |
+
# reinstantiate inner opt state to avoid type conflict
|
373 |
+
if hasattr(opt_state, "inner_opt_state"):
|
374 |
+
print("restoring state of multisteps optimizer")
|
375 |
+
inner_opt_state = reinstantiate_states(opt_state.inner_opt_state)
|
376 |
+
ms_state_dict = {k: getattr(state.opt_state, k) for k in state.opt_state._fields}
|
377 |
+
ms_state_dict["inner_opt_state"] = inner_opt_state
|
378 |
+
opt_state = optax.MultiStepsState(**ms_state_dict)
|
379 |
+
|
380 |
+
return state.replace(step=step, params=params, opt_state=opt_state)
|
381 |
+
|
382 |
+
|
383 |
+
def rotate_checkpoints(ckpt_dir: str, save_total_limit: int):
|
384 |
+
"Removes older checkpoints so that `save_total_limit` checkpoints are kept"
|
385 |
+
# TODO: what to remove is decided using step number only, we might want to improve that
|
386 |
+
ckpts = [str(x) for x in Path(ckpt_dir).glob("ckpt-*")]
|
387 |
+
# sort checkpoints by step
|
388 |
+
ckpts_sorted = sorted(ckpts, key=lambda x: int(x.split('-')[-1]))
|
389 |
+
ckpts_to_delete = ckpts_sorted[:-save_total_limit]
|
390 |
+
for ckpt in ckpts_to_delete:
|
391 |
+
logger.info(f"Deleting older checkpoint [{ckpt}] due to save_total_limit ({save_total_limit})")
|
392 |
+
shutil.rmtree(ckpt)
|
393 |
+
|
394 |
+
|
395 |
+
def main():
|
396 |
+
# See all possible arguments in src/transformers/training_args.py
|
397 |
+
# or by passing the --help flag to this script.
|
398 |
+
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
399 |
+
|
400 |
+
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
|
401 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
402 |
+
# If we pass only one argument to the script and it's the path to a json file,
|
403 |
+
# let's parse it to get our arguments.
|
404 |
+
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
|
405 |
+
else:
|
406 |
+
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
407 |
+
|
408 |
+
if (
|
409 |
+
os.path.exists(training_args.output_dir)
|
410 |
+
and os.listdir(training_args.output_dir)
|
411 |
+
and training_args.do_train
|
412 |
+
and not training_args.overwrite_output_dir
|
413 |
+
):
|
414 |
+
raise ValueError(
|
415 |
+
f"Output directory ({training_args.output_dir}) already exists and is not empty."
|
416 |
+
"Use --overwrite_output_dir to overcome."
|
417 |
+
)
|
418 |
+
|
419 |
+
# Make one log on every process with the configuration for debugging.
|
420 |
+
logging.basicConfig(
|
421 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
422 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
423 |
+
level=logging.INFO,
|
424 |
+
)
|
425 |
+
# Setup logging, we only want one process per machine to log things on the screen.
|
426 |
+
logger.setLevel(logging.INFO if jax.process_index() == 0 else logging.ERROR)
|
427 |
+
if jax.process_index() == 0:
|
428 |
+
datasets.utils.logging.set_verbosity_warning()
|
429 |
+
transformers.utils.logging.set_verbosity_info()
|
430 |
+
else:
|
431 |
+
datasets.utils.logging.set_verbosity_error()
|
432 |
+
transformers.utils.logging.set_verbosity_error()
|
433 |
+
|
434 |
+
# Set the verbosity to info of the Transformers logger (on main process only):
|
435 |
+
logger.info(f"Training/evaluation parameters {training_args}")
|
436 |
+
|
437 |
+
# Set seed before initializing model.
|
438 |
+
set_seed(training_args.seed)
|
439 |
+
|
440 |
+
# # Handle the repository creation
|
441 |
+
# if training_args.push_to_hub:
|
442 |
+
# if training_args.hub_model_id is None:
|
443 |
+
# repo_name = get_full_repo_name(
|
444 |
+
# Path(training_args.output_dir).absolute().name, token=training_args.hub_token
|
445 |
+
# )
|
446 |
+
# else:
|
447 |
+
# repo_name = training_args.hub_model_id
|
448 |
+
# repo = Repository(training_args.output_dir, clone_from=repo_name)
|
449 |
+
|
450 |
+
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
|
451 |
+
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
|
452 |
+
# (the dataset will be downloaded automatically from the datasets Hub).
|
453 |
+
#
|
454 |
+
# For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
|
455 |
+
# 'text' is found. You can easily tweak this behavior (see below).
|
456 |
+
#
|
457 |
+
# In distributed training, the load_dataset function guarantees that only one local process can concurrently
|
458 |
+
# download the dataset.
|
459 |
+
if data_args.dataset_name is not None:
|
460 |
+
# Downloading and loading a dataset from the hub.
|
461 |
+
dataset = load_dataset(
|
462 |
+
data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, keep_in_memory=False
|
463 |
+
)
|
464 |
+
|
465 |
+
if "validation" not in dataset.keys():
|
466 |
+
dataset["validation"] = load_dataset(
|
467 |
+
data_args.dataset_name,
|
468 |
+
data_args.dataset_config_name,
|
469 |
+
split=f"train[:{data_args.validation_split_percentage}%]",
|
470 |
+
cache_dir=model_args.cache_dir,
|
471 |
+
)
|
472 |
+
dataset["train"] = load_dataset(
|
473 |
+
data_args.dataset_name,
|
474 |
+
data_args.dataset_config_name,
|
475 |
+
split=f"train[{data_args.validation_split_percentage}%:]",
|
476 |
+
cache_dir=model_args.cache_dir,
|
477 |
+
)
|
478 |
+
else:
|
479 |
+
data_files = {}
|
480 |
+
dataset_args = {}
|
481 |
+
if data_args.train_file is not None:
|
482 |
+
data_files["train"] = data_args.train_file
|
483 |
+
if data_args.validation_file is not None:
|
484 |
+
data_files["validation"] = data_args.validation_file
|
485 |
+
extension = data_args.train_file.split(".")[-1]
|
486 |
+
if extension == "txt":
|
487 |
+
extension = "text"
|
488 |
+
dataset_args["keep_linebreaks"] = data_args.keep_linebreaks
|
489 |
+
dataset = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir, **dataset_args)
|
490 |
+
|
491 |
+
if "validation" not in dataset.keys():
|
492 |
+
dataset["validation"] = load_dataset(
|
493 |
+
extension,
|
494 |
+
data_files=data_files,
|
495 |
+
split=f"train[:{data_args.validation_split_percentage}%]",
|
496 |
+
cache_dir=model_args.cache_dir,
|
497 |
+
**dataset_args,
|
498 |
+
)
|
499 |
+
dataset["train"] = load_dataset(
|
500 |
+
extension,
|
501 |
+
data_files=data_files,
|
502 |
+
split=f"train[{data_args.validation_split_percentage}%:]",
|
503 |
+
cache_dir=model_args.cache_dir,
|
504 |
+
**dataset_args,
|
505 |
+
)
|
506 |
+
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
|
507 |
+
# https://huggingface.co/docs/datasets/loading_datasets.html.
|
508 |
+
|
509 |
+
# Load pretrained model and tokenizer
|
510 |
+
|
511 |
+
# Distributed training:
|
512 |
+
# The .from_pretrained methods guarantee that only one local process can concurrently
|
513 |
+
# download model & vocab.
|
514 |
+
if model_args.config_name:
|
515 |
+
config = AutoConfig.from_pretrained(model_args.config_name, cache_dir=model_args.cache_dir)
|
516 |
+
elif model_args.model_name_or_path:
|
517 |
+
config = AutoConfig.from_pretrained(model_args.model_name_or_path, cache_dir=model_args.cache_dir)
|
518 |
+
else:
|
519 |
+
config = CONFIG_MAPPING[model_args.model_type]()
|
520 |
+
logger.warning("You are instantiating a new config instance from scratch.")
|
521 |
+
|
522 |
+
if model_args.tokenizer_name:
|
523 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
524 |
+
model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
|
525 |
+
)
|
526 |
+
elif model_args.model_name_or_path:
|
527 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
528 |
+
model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
|
529 |
+
)
|
530 |
+
else:
|
531 |
+
raise ValueError(
|
532 |
+
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
|
533 |
+
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
|
534 |
+
)
|
535 |
+
|
536 |
+
if model_args.model_name_or_path:
|
537 |
+
model = FlaxAutoModelForCausalLM.from_pretrained(
|
538 |
+
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
539 |
+
)
|
540 |
+
else:
|
541 |
+
model = FlaxAutoModelForCausalLM.from_config(
|
542 |
+
config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
543 |
+
)
|
544 |
+
|
545 |
+
# Preprocessing the datasets.
|
546 |
+
# First we tokenize all the texts.
|
547 |
+
if training_args.do_train:
|
548 |
+
column_names = dataset["train"].column_names
|
549 |
+
else:
|
550 |
+
column_names = dataset["validation"].column_names
|
551 |
+
text_column_name = "text" if "text" in column_names else column_names[0]
|
552 |
+
|
553 |
+
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
|
554 |
+
tok_logger = transformers.utils.logging.get_logger("transformers.tokenization_utils_base")
|
555 |
+
|
556 |
+
def tokenize_function(examples):
|
557 |
+
with CaptureLogger(tok_logger) as cl:
|
558 |
+
output = tokenizer(examples[text_column_name])
|
559 |
+
# clm input could be much much longer than block_size
|
560 |
+
if "Token indices sequence length is longer than the" in cl.out:
|
561 |
+
tok_logger.warning(
|
562 |
+
"^^^^^^^^^^^^^^^^ Please ignore the warning above - this long input will be chunked into smaller bits before being passed to the model."
|
563 |
+
)
|
564 |
+
return output
|
565 |
+
|
566 |
+
tokenized_datasets = dataset.map(
|
567 |
+
tokenize_function,
|
568 |
+
batched=True,
|
569 |
+
num_proc=data_args.preprocessing_num_workers,
|
570 |
+
remove_columns=column_names,
|
571 |
+
load_from_cache_file=not data_args.overwrite_cache,
|
572 |
+
)
|
573 |
+
|
574 |
+
if data_args.block_size is None:
|
575 |
+
block_size = tokenizer.model_max_length
|
576 |
+
if block_size > config.max_position_embeddings:
|
577 |
+
logger.warning(
|
578 |
+
f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
|
579 |
+
"Picking 1024 instead. You can change that default value by passing --block_size xxx."
|
580 |
+
)
|
581 |
+
block_size = 1024
|
582 |
+
else:
|
583 |
+
if data_args.block_size > tokenizer.model_max_length:
|
584 |
+
logger.warning(
|
585 |
+
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
|
586 |
+
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
|
587 |
+
)
|
588 |
+
block_size = min(data_args.block_size, tokenizer.model_max_length)
|
589 |
+
|
590 |
+
# Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
|
591 |
+
def group_texts(examples):
|
592 |
+
# Concatenate all texts.
|
593 |
+
concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
|
594 |
+
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
595 |
+
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
596 |
+
# customize this part to your needs.
|
597 |
+
if total_length >= block_size:
|
598 |
+
total_length = (total_length // block_size) * block_size
|
599 |
+
# Split by chunks of max_len.
|
600 |
+
result = {
|
601 |
+
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
602 |
+
for k, t in concatenated_examples.items()
|
603 |
+
}
|
604 |
+
result["labels"] = result["input_ids"].copy()
|
605 |
+
return result
|
606 |
+
|
607 |
+
# Note that with `batched=True`, this map processes 1,000 texts together, so group_texts throws away a remainder
|
608 |
+
# for each of those groups of 1,000 texts. You can adjust that batch_size here but a higher value might be slower
|
609 |
+
# to preprocess.
|
610 |
+
#
|
611 |
+
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
|
612 |
+
# https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
|
613 |
+
|
614 |
+
lm_datasets = tokenized_datasets.map(
|
615 |
+
group_texts,
|
616 |
+
batched=True,
|
617 |
+
num_proc=data_args.preprocessing_num_workers,
|
618 |
+
load_from_cache_file=not data_args.overwrite_cache,
|
619 |
+
)
|
620 |
+
|
621 |
+
if training_args.do_train:
|
622 |
+
if "train" not in tokenized_datasets:
|
623 |
+
raise ValueError("--do_train requires a train dataset")
|
624 |
+
train_dataset = lm_datasets["train"]
|
625 |
+
if data_args.max_train_samples is not None:
|
626 |
+
train_dataset = train_dataset.select(range(data_args.max_train_samples))
|
627 |
+
|
628 |
+
if training_args.do_eval:
|
629 |
+
if "validation" not in tokenized_datasets:
|
630 |
+
raise ValueError("--do_eval requires a validation dataset")
|
631 |
+
eval_dataset = lm_datasets["validation"]
|
632 |
+
if data_args.max_eval_samples is not None:
|
633 |
+
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
|
634 |
+
|
635 |
+
# Enable tensorboard only on the master node
|
636 |
+
has_tensorboard = is_tensorboard_available()
|
637 |
+
if has_tensorboard and jax.process_index() == 0:
|
638 |
+
try:
|
639 |
+
from flax.metrics.tensorboard import SummaryWriter
|
640 |
+
|
641 |
+
summary_writer = SummaryWriter(log_dir=Path(training_args.output_dir + "/runs"))
|
642 |
+
except ImportError as ie:
|
643 |
+
has_tensorboard = False
|
644 |
+
logger.warning(
|
645 |
+
f"Unable to display metrics through TensorBoard because some package are not installed: {ie}"
|
646 |
+
)
|
647 |
+
else:
|
648 |
+
logger.warning(
|
649 |
+
"Unable to display metrics through TensorBoard because the package is not installed: "
|
650 |
+
"Please run pip install tensorboard to enable."
|
651 |
+
)
|
652 |
+
|
653 |
+
# Initialize our training
|
654 |
+
rng = jax.random.PRNGKey(training_args.seed)
|
655 |
+
rng, dropout_rng = jax.random.split(rng)
|
656 |
+
|
657 |
+
# Store some constant
|
658 |
+
num_epochs = int(training_args.num_train_epochs)
|
659 |
+
train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
|
660 |
+
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
|
661 |
+
steps_per_epoch = len(train_dataset) // train_batch_size
|
662 |
+
total_train_steps = steps_per_epoch * num_epochs
|
663 |
+
|
664 |
+
# Create learning rate schedule
|
665 |
+
linear_decay_lr_schedule_fn = create_learning_rate_fn(
|
666 |
+
len(train_dataset),
|
667 |
+
train_batch_size,
|
668 |
+
training_args.num_train_epochs,
|
669 |
+
training_args.warmup_steps,
|
670 |
+
training_args.learning_rate,
|
671 |
+
)
|
672 |
+
|
673 |
+
# We use Optax's "masking" functionality to not apply weight decay
|
674 |
+
# to bias and LayerNorm scale parameters. decay_mask_fn returns a
|
675 |
+
# mask boolean with the same structure as the parameters.
|
676 |
+
# The mask is True for parameters that should be decayed.
|
677 |
+
# Note that this mask is specifically adapted for FlaxGPT2.
|
678 |
+
# For other models, one should correct the layer norm parameter naming
|
679 |
+
# accordingly.
|
680 |
+
def decay_mask_fn(params):
|
681 |
+
flat_params = traverse_util.flatten_dict(params)
|
682 |
+
flat_mask = {
|
683 |
+
path: (path[-1] != "bias" and path[-2:] not in [("ln_1", "scale"), ("ln_2", "scale"), ("ln_f", "scale")])
|
684 |
+
for path in flat_params
|
685 |
+
}
|
686 |
+
return traverse_util.unflatten_dict(flat_mask)
|
687 |
+
|
688 |
+
# create adam optimizer
|
689 |
+
if training_args.adafactor:
|
690 |
+
# We use the default parameters here to initialize adafactor,
|
691 |
+
# For more details about the parameters please check https://github.com/deepmind/optax/blob/ed02befef9bf81cbbf236be3d2b0e032e9ed4a40/optax/_src/alias.py#L74
|
692 |
+
optimizer = optax.adafactor(
|
693 |
+
learning_rate=linear_decay_lr_schedule_fn,
|
694 |
+
)
|
695 |
+
else:
|
696 |
+
optimizer = optax.adamw(
|
697 |
+
learning_rate=linear_decay_lr_schedule_fn,
|
698 |
+
b1=training_args.adam_beta1,
|
699 |
+
b2=training_args.adam_beta2,
|
700 |
+
eps=training_args.adam_epsilon,
|
701 |
+
weight_decay=training_args.weight_decay,
|
702 |
+
mask=decay_mask_fn,
|
703 |
+
)
|
704 |
+
|
705 |
+
# Setup train state
|
706 |
+
state = TrainState.create(apply_fn=model.__call__, params=model.params, tx=optimizer, dropout_rng=dropout_rng)
|
707 |
+
|
708 |
+
# if training_args.resume_from_checkpoint:
|
709 |
+
# state = restore_model_checkpoint(training_args.resume_from_checkpoint, state)
|
710 |
+
# resume_step = mb_item(state.step)
|
711 |
+
# if training_args.adafactor:
|
712 |
+
# state = fake_update(state)
|
713 |
+
# else:
|
714 |
+
resume_step = 0
|
715 |
+
|
716 |
+
def loss_fn(logits, labels):
|
717 |
+
shift_logits = logits[..., :-1, :]
|
718 |
+
shift_labels = labels[..., 1:]
|
719 |
+
loss = optax.softmax_cross_entropy(shift_logits, onehot(shift_labels, shift_logits.shape[-1]))
|
720 |
+
return loss.mean()
|
721 |
+
|
722 |
+
# Define gradient update step fn
|
723 |
+
def train_step(state, batch):
|
724 |
+
dropout_rng, new_dropout_rng = jax.random.split(state.dropout_rng)
|
725 |
+
|
726 |
+
def compute_loss(params):
|
727 |
+
labels = batch.pop("labels")
|
728 |
+
logits = state.apply_fn(**batch, params=params, dropout_rng=dropout_rng, train=True)[0]
|
729 |
+
loss = loss_fn(logits, labels)
|
730 |
+
return loss
|
731 |
+
|
732 |
+
grad_fn = jax.value_and_grad(compute_loss)
|
733 |
+
loss, grad = grad_fn(state.params)
|
734 |
+
grad = jax.lax.pmean(grad, "batch")
|
735 |
+
|
736 |
+
new_state = state.apply_gradients(grads=grad, dropout_rng=new_dropout_rng)
|
737 |
+
|
738 |
+
metrics = {"loss": loss, "learning_rate": linear_decay_lr_schedule_fn(state.step)}
|
739 |
+
metrics = jax.lax.pmean(metrics, axis_name="batch")
|
740 |
+
|
741 |
+
return new_state, metrics
|
742 |
+
|
743 |
+
# Define eval fn
|
744 |
+
def eval_step(params, batch):
|
745 |
+
labels = batch.pop("labels")
|
746 |
+
logits = model(**batch, params=params, train=False)[0]
|
747 |
+
loss = loss_fn(logits, labels)
|
748 |
+
|
749 |
+
# summarize metrics
|
750 |
+
metrics = {"loss": loss}
|
751 |
+
metrics = jax.lax.pmean(metrics, axis_name="batch")
|
752 |
+
return metrics
|
753 |
+
|
754 |
+
# Create parallel version of the train and eval step
|
755 |
+
p_train_step = jax.pmap(train_step, "batch", donate_argnums=(0,))
|
756 |
+
p_eval_step = jax.pmap(eval_step, "batch")
|
757 |
+
|
758 |
+
# Replicate the train state on each device
|
759 |
+
state = state.replicate()
|
760 |
+
|
761 |
+
logger.info("***** Running training *****")
|
762 |
+
logger.info(f" Num examples = {len(train_dataset)}")
|
763 |
+
logger.info(f" Num Epochs = {num_epochs}")
|
764 |
+
logger.info(f" Num tokenized group examples {len(tokenized_datasets['train'])}")
|
765 |
+
logger.info(f" Instantaneous batch size per device = {training_args.per_device_train_batch_size}")
|
766 |
+
logger.info(f" Total train batch size (w. parallel & distributed) = {train_batch_size}")
|
767 |
+
logger.info(f" Total optimization steps = {total_train_steps}")
|
768 |
+
|
769 |
+
train_time = 0
|
770 |
+
train_metrics = []
|
771 |
+
resume_epoch = resume_step // (steps_per_epoch)
|
772 |
+
epochs = tqdm(range(num_epochs), desc=f"Epoch ... ({resume_epoch + 1}/{num_epochs})", position=0)
|
773 |
+
if resume_step != 0:
|
774 |
+
logger.info(f"Skipping to epoch {resume_epoch} step {resume_step}")
|
775 |
+
for epoch in epochs:
|
776 |
+
# ======================== Training ================================
|
777 |
+
if epoch < resume_epoch:
|
778 |
+
continue
|
779 |
+
|
780 |
+
train_start = time.time()
|
781 |
+
|
782 |
+
# Create sampling rng
|
783 |
+
rng, input_rng = jax.random.split(rng)
|
784 |
+
|
785 |
+
# Generate an epoch by shuffling sampling indices from the train dataset
|
786 |
+
train_loader = data_loader(input_rng, train_dataset, train_batch_size, shuffle=True)
|
787 |
+
steps_per_epoch = len(train_dataset) // train_batch_size
|
788 |
+
# train
|
789 |
+
for step in tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False):
|
790 |
+
cur_step = epoch * (len(train_dataset) // train_batch_size) + step
|
791 |
+
# skip to the step from which we are resuming
|
792 |
+
if cur_step < resume_step:
|
793 |
+
continue
|
794 |
+
|
795 |
+
batch = next(train_loader)
|
796 |
+
batch = shard(batch)
|
797 |
+
state, train_metric = p_train_step(state, batch)
|
798 |
+
train_metrics.append(train_metric)
|
799 |
+
|
800 |
+
|
801 |
+
if cur_step % training_args.logging_steps == 0 and cur_step > 0:
|
802 |
+
# Save metrics
|
803 |
+
train_metric = unreplicate(train_metric)
|
804 |
+
train_time += time.time() - train_start
|
805 |
+
if has_tensorboard and jax.process_index() == 0:
|
806 |
+
write_train_metric(summary_writer, train_metrics, train_time, cur_step)
|
807 |
+
|
808 |
+
epochs.write(
|
809 |
+
f"Step... ({cur_step} | Loss: {train_metric['loss'].mean()}, Learning Rate: {train_metric['learning_rate'].mean()})"
|
810 |
+
)
|
811 |
+
|
812 |
+
train_metrics = []
|
813 |
+
|
814 |
+
if cur_step % training_args.eval_steps == 0 and cur_step > 0:
|
815 |
+
# ======================== Evaluating ==============================
|
816 |
+
eval_metrics = []
|
817 |
+
eval_loader = data_loader(input_rng, eval_dataset, eval_batch_size)
|
818 |
+
eval_steps = len(eval_dataset) // eval_batch_size
|
819 |
+
for _ in tqdm(range(eval_steps), desc="Evaluating...", position=2, leave=False):
|
820 |
+
# Model forward
|
821 |
+
batch = next(eval_loader)
|
822 |
+
batch = shard(batch)
|
823 |
+
metrics = p_eval_step(state.params, batch)
|
824 |
+
eval_metrics.append(metrics)
|
825 |
+
|
826 |
+
# normalize eval metrics
|
827 |
+
eval_metrics = get_metrics(eval_metrics)
|
828 |
+
eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
|
829 |
+
|
830 |
+
try:
|
831 |
+
eval_metrics["perplexity"] = math.exp(eval_metrics["loss"])
|
832 |
+
except OverflowError:
|
833 |
+
eval_metrics["perplexity"] = float("inf")
|
834 |
+
|
835 |
+
# Print metrics and update progress bar
|
836 |
+
desc = f"Step... ({cur_step} | Eval Loss: {eval_metrics['loss']} | Eval Perplexity: {eval_metrics['perplexity']})"
|
837 |
+
epochs.write(desc)
|
838 |
+
epochs.desc = desc
|
839 |
+
|
840 |
+
# Save metrics
|
841 |
+
if has_tensorboard and jax.process_index() == 0:
|
842 |
+
write_eval_metric(summary_writer, eval_metrics, cur_step)
|
843 |
+
|
844 |
+
if cur_step % training_args.save_steps == 0 and cur_step > 0:
|
845 |
+
# save checkpoint after each epoch and push checkpoint to the hub
|
846 |
+
if jax.process_index() == 0:
|
847 |
+
save_model_checkpoint(model, training_args.output_dir, state, with_opt=False,
|
848 |
+
push_to_hub=training_args.push_to_hub)
|
849 |
+
# params = jax.device_get(unreplicate(state.params))
|
850 |
+
# model.save_pretrained(training_args.output_dir, params=params)
|
851 |
+
# tokenizer.save_pretrained(training_args.output_dir)
|
852 |
+
# if training_args.push_to_hub:
|
853 |
+
# repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
|
854 |
+
|
855 |
+
# Eval after training
|
856 |
+
if training_args.do_eval:
|
857 |
+
eval_metrics = []
|
858 |
+
eval_loader = data_loader(input_rng, eval_dataset, eval_batch_size)
|
859 |
+
eval_steps = len(eval_dataset) // eval_batch_size
|
860 |
+
for _ in tqdm(range(eval_steps), desc="Evaluating...", position=2, leave=False):
|
861 |
+
# Model forward
|
862 |
+
batch = shard(next(eval_loader))
|
863 |
+
metrics = p_eval_step(state.params, batch)
|
864 |
+
eval_metrics.append(metrics)
|
865 |
+
|
866 |
+
# normalize eval metrics
|
867 |
+
eval_metrics = get_metrics(eval_metrics)
|
868 |
+
eval_metrics = jax.tree_map(lambda x: jnp.mean(x).item(), eval_metrics)
|
869 |
+
|
870 |
+
try:
|
871 |
+
eval_metrics["perplexity"] = math.exp(eval_metrics["loss"])
|
872 |
+
except OverflowError:
|
873 |
+
eval_metrics["perplexity"] = float("inf")
|
874 |
+
|
875 |
+
if jax.process_index() == 0:
|
876 |
+
eval_metrics = {f"eval_{metric_name}": value for metric_name, value in eval_metrics.items()}
|
877 |
+
path = os.path.join(training_args.output_dir, "eval_results.json")
|
878 |
+
with open(path, "w") as f:
|
879 |
+
json.dump(eval_metrics, f, indent=4, sort_keys=True)
|
880 |
+
|
881 |
+
# save model after training is over
|
882 |
+
if jax.process_index() == 0:
|
883 |
+
save_model_checkpoint(model, training_args.output_dir, state, with_opt=False,
|
884 |
+
push_to_hub=training_args.push_to_hub)
|
885 |
+
|
886 |
+
|
887 |
+
|
888 |
+
if __name__ == "__main__":
|
889 |
+
main()
|
run_gpt.sh
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
export HF_PROJECT="gpt2-medium-dutch"
|
4 |
+
|
5 |
+
# Variables for training the tokenizer and creating the config
|
6 |
+
export VOCAB_SIZE="50257"
|
7 |
+
export DATASET="yhavinga/mc4_nl_cleaned" # Name of the dataset in the Huggingface Hub
|
8 |
+
export DATASET_CONFIG="full" # Config of the dataset in the Huggingface Hub
|
9 |
+
export DATASET_SPLIT="train" # Split to use for training tokenizer and model
|
10 |
+
export TEXT_FIELD="text" # Field containing the text to be used for training
|
11 |
+
export CONFIG_TYPE="gpt2-medium" # Config that our model will use
|
12 |
+
export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model, e.g. here inside the mount
|
13 |
+
|
14 |
+
python run_clm_flax.py \
|
15 |
+
--output_dir="${MODEL_PATH}" \
|
16 |
+
--model_type="gpt2" \
|
17 |
+
--config_name="${MODEL_PATH}" \
|
18 |
+
--tokenizer_name="${MODEL_PATH}" \
|
19 |
+
--preprocessing_num_workers="96" \
|
20 |
+
--do_train --do_eval \
|
21 |
+
--dataset_name="${DATASET}" \
|
22 |
+
--dataset_config_name="${DATASET_CONFIG}" \
|
23 |
+
--block_size="512" \
|
24 |
+
--per_device_train_batch_size="16" \
|
25 |
+
--per_device_eval_batch_size="16" \
|
26 |
+
--learning_rate="8e-4" --warmup_steps="5000" \
|
27 |
+
--adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
|
28 |
+
--overwrite_output_dir \
|
29 |
+
--num_train_epochs="4" \
|
30 |
+
--logging_steps="500" \
|
31 |
+
--save_steps="40000" \
|
32 |
+
--eval_steps="2500" \
|
33 |
+
--push_to_hub
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,494 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html class="">
|
3 |
+
<head>
|
4 |
+
<meta charset="utf-8" />
|
5 |
+
<meta
|
6 |
+
name="viewport"
|
7 |
+
content="width=device-width, initial-scale=1.0, user-scalable=no"
|
8 |
+
/>
|
9 |
+
<meta name="description" content="We’re on a journey to advance and democratize artificial intelligence through open source and open science." />
|
10 |
+
<meta property="fb:app_id" content="1321688464574422" />
|
11 |
+
<meta name="twitter:card" content="summary_large_image" />
|
12 |
+
<meta name="twitter:site" content="@huggingface" />
|
13 |
+
<meta property="og:title" content="tokenizer_config.json · flax-community/gpt2-medium-indonesian at main" />
|
14 |
+
<meta property="og:type" content="website" />
|
15 |
+
<meta property="og:url" content="https://huggingface.co/flax-community/gpt2-medium-indonesian/blob/main/tokenizer_config.json" />
|
16 |
+
<meta property="og:image" content="https://huggingface.co/front/thumbnails/v2-2.png" />
|
17 |
+
|
18 |
+
<link rel="stylesheet" href="/front/build/style.219a3fdc.css" />
|
19 |
+
|
20 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" />
|
21 |
+
<link
|
22 |
+
href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:ital,wght@0,200;0,300;0,400;0,600;0,700;0,900;1,200;1,300;1,400;1,600;1,700;1,900&display=swap"
|
23 |
+
rel="stylesheet"
|
24 |
+
/>
|
25 |
+
<link
|
26 |
+
href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&display=swap"
|
27 |
+
rel="stylesheet"
|
28 |
+
/>
|
29 |
+
<link
|
30 |
+
rel="stylesheet"
|
31 |
+
href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css"
|
32 |
+
/>
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
<title>tokenizer_config.json · flax-community/gpt2-medium-indonesian at main</title>
|
37 |
+
</head>
|
38 |
+
<body
|
39 |
+
class="flex flex-col min-h-screen bg-white dark:bg-gray-950 text-black ViewerBlobPage"
|
40 |
+
>
|
41 |
+
<div class="flex flex-col min-h-screen "><header class="border-b border-gray-100"><div class="w-full px-4 lg:px-6 xl:container flex items-center h-16"><div class="flex flex-1 items-center"><a class="flex flex-none items-center mr-5 lg:mr-6" href="/"><img alt="Hugging Face's logo" class="md:mr-2 w-7" src="/front/assets/huggingface_logo-noborder.svg">
|
42 |
+
<span class="hidden text-lg font-bold whitespace-nowrap md:block">Hugging Face</span></a>
|
43 |
+
<div class="SVELTE_HYDRATER flex-1 lg:max-w-sm mr-2 sm:mr-4 lg:mr-6" data-props="{"header":true,"placeholder":"Search models, datasets, users...","url":"/api/quicksearch","searchParams":{"withLinks":true}}" data-target="QuickSearch"><div class="relative "><input autocomplete="off" class="w-full dark:bg-gray-950
|
44 |
+
form-input-alt h-9 pl-8 pr-3 focus:shadow-xl" name="" placeholder="Search models, datasets, users..." spellcheck="false" type="text">
|
45 |
+
<svg class="absolute left-2.5 top-2.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M30 28.59L22.45 21A11 11 0 1 0 21 22.45L28.59 30zM5 14a9 9 0 1 1 9 9a9 9 0 0 1-9-9z" fill="currentColor"></path></svg>
|
46 |
+
</div></div>
|
47 |
+
<div class="SVELTE_HYDRATER " data-props="{"apiInferenceUrl":"https://api-inference.huggingface.co"}" data-target="NavigationMenuPhone"><button class="lg:hidden relative flex-none place-self-stretch flex items-center justify-center w-8" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="22" height="22" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" fill="currentColor"><path d="M4 24h24v2H4z"></path><path d="M4 12h24v2H4z"></path><path d="M4 18h24v2H4z"></path><path d="M4 6h24v2H4z"></path></svg></button>
|
48 |
+
</div></div>
|
49 |
+
<nav aria-label="Main" class="ml-auto hidden lg:block"><ul class="flex items-center space-x-2"><li><a class="flex items-center group px-2 py-0.5 hover:text-indigo-700 dark:hover:text-gray-400" href="/models"><svg class="mr-1.5 text-gray-400 group-hover:text-indigo-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg>
|
50 |
+
Models
|
51 |
+
</a></li>
|
52 |
+
<li><a class="flex items-center group px-2 py-0.5 hover:text-red-700 dark:hover:text-gray-400" href="/datasets"><svg class="mr-1.5 text-gray-400 group-hover:text-red-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 25 25"><ellipse cx="12.5" cy="5" fill="currentColor" fill-opacity="0.25" rx="7.5" ry="2"></ellipse><path d="M12.5 15C16.6421 15 20 14.1046 20 13V20C20 21.1046 16.6421 22 12.5 22C8.35786 22 5 21.1046 5 20V13C5 14.1046 8.35786 15 12.5 15Z" fill="currentColor" opacity="0.5"></path><path d="M12.5 7C16.6421 7 20 6.10457 20 5V11.5C20 12.6046 16.6421 13.5 12.5 13.5C8.35786 13.5 5 12.6046 5 11.5V5C5 6.10457 8.35786 7 12.5 7Z" fill="currentColor" opacity="0.5"></path><path d="M5.23628 12C5.08204 12.1598 5 12.8273 5 13C5 14.1046 8.35786 15 12.5 15C16.6421 15 20 14.1046 20 13C20 12.8273 19.918 12.1598 19.7637 12C18.9311 12.8626 15.9947 13.5 12.5 13.5C9.0053 13.5 6.06886 12.8626 5.23628 12Z" fill="currentColor"></path></svg>
|
53 |
+
Datasets
|
54 |
+
</a></li>
|
55 |
+
<li><a class="flex items-center group px-2 py-0.5 hover:text-blue-700 dark:hover:text-blue-400" href="/spaces"><svg class="mr-1.5 text-gray-400 group-hover:text-blue-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 25 25"><path opacity=".5" d="M6.016 14.674v4.31h4.31v-4.31h-4.31ZM14.674 14.674v4.31h4.31v-4.31h-4.31ZM6.016 6.016v4.31h4.31v-4.31h-4.31Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M3 4.914C3 3.857 3.857 3 4.914 3h6.514c.884 0 1.628.6 1.848 1.414a5.171 5.171 0 0 1 7.31 7.31c.815.22 1.414.964 1.414 1.848v6.514A1.914 1.914 0 0 1 20.086 22H4.914A1.914 1.914 0 0 1 3 20.086V4.914Zm3.016 1.102v4.31h4.31v-4.31h-4.31Zm0 12.968v-4.31h4.31v4.31h-4.31Zm8.658 0v-4.31h4.31v4.31h-4.31Zm0-10.813a2.155 2.155 0 1 1 4.31 0 2.155 2.155 0 0 1-4.31 0Z" fill="currentColor"></path><path opacity=".25" d="M16.829 6.016a2.155 2.155 0 1 0 0 4.31 2.155 2.155 0 0 0 0-4.31Z" fill="currentColor"></path></svg>
|
56 |
+
Spaces
|
57 |
+
</a></li>
|
58 |
+
<li class="b-r"><div class="relative v2-dropdown">
|
59 |
+
<button class="
|
60 |
+
px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400 flex items-center
|
61 |
+
|
62 |
+
v2-dropdown-button" type="button">
|
63 |
+
<svg class="mr-1.5 text-gray-400 w-3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" viewBox="0 0 11 8" preserveAspectRatio="xMidYMid meet" fill="currentColor"><path fill-rule="evenodd" clip-rule="evenodd" d="M11 1.5H0V0.5H11V1.5ZM11 7.5H0V6.5H11V7.5ZM0 4.5H11V3.5H0V4.5Z"></path></svg>
|
64 |
+
Resources
|
65 |
+
</button>
|
66 |
+
|
67 |
+
|
68 |
+
<div class="absolute top-full mt-1 min-w-full w-auto bg-white rounded-xl overflow-hidden shadow-lg z-10 border border-gray-100
|
69 |
+
left-0
|
70 |
+
!w-64 v2-dropdown-menu hidden"><ul class="min-w-full w-auto">
|
71 |
+
<li><div class="col-span-full px-4 py-0.5 flex items-center justify-between font-semibold bg-gradient-to-r to-white dark:to-gray-925 whitespace-nowrap text-blue-800 from-blue-50 dark:text-blue-100 dark:from-blue-900">Website
|
72 |
+
</div>
|
73 |
+
<ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
74 |
+
|
75 |
+
hover:underline
|
76 |
+
v2-dropdown-entry" href="/metrics">
|
77 |
+
<svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M6 23H2a1 1 0 0 1-1-1v-8a1 1 0 0 1 1-1h4a1 1 0 0 1 1 1v8a1 1 0 0 1-1 1z" opacity=".25" fill="currentColor"></path><path class="uim-primary" d="M14 23h-4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h4a1 1 0 0 1 1 1v20a1 1 0 0 1-1 1z" fill="currentColor"></path><path class="uim-tertiary" d="M22 23h-4a1 1 0 0 1-1-1V10a1 1 0 0 1 1-1h4a1 1 0 0 1 1 1v12a1 1 0 0 1-1 1z" opacity=".5" fill="currentColor"></path></svg>
|
78 |
+
Metrics</a></li>
|
79 |
+
|
80 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
81 |
+
|
82 |
+
hover:underline
|
83 |
+
v2-dropdown-entry" href="/languages">
|
84 |
+
<svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-primary" d="M17 13H7a1 1 0 0 1 0-2h10a1 1 0 0 1 0 2z" fill="currentColor"></path><path class="uim-tertiary" d="M12 2a10 10 0 0 0-7.743 16.33l-1.964 1.963A1 1 0 0 0 3 22h9a10 10 0 0 0 0-20zM9 7h6a1 1 0 0 1 0 2H9a1 1 0 0 1 0-2zm6 10H9a1 1 0 0 1 0-2h6a1 1 0 0 1 0 2zm2-4H7a1 1 0 0 1 0-2h10a1 1 0 0 1 0 2z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M15 17H9a1 1 0 0 1 0-2h6a1 1 0 0 1 0 2zm0-8H9a1 1 0 0 1 0-2h6a1 1 0 0 1 0 2z" fill="currentColor"></path></svg>
|
85 |
+
Languages</a></li>
|
86 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
87 |
+
|
88 |
+
hover:underline
|
89 |
+
v2-dropdown-entry" href="/organizations">
|
90 |
+
<svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-tertiary" d="M12 18a3.5 3.5 0 1 1 3.5-3.5A3.504 3.504 0 0 1 12 18z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M14.64 16.772a3.452 3.452 0 0 1-5.28 0A4.988 4.988 0 0 0 7 21a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1a4.988 4.988 0 0 0-2.36-4.228z" fill="currentColor"></path><path class="uim-tertiary" d="M21 12a.996.996 0 0 1-.664-.252L12 4.338l-8.336 7.41a1 1 0 0 1-1.328-1.496l9-8a.999.999 0 0 1 1.328 0l9 8A1 1 0 0 1 21 12z" opacity=".5" fill="currentColor"></path><path class="uim-quaternary" d="M12 4.338l-8 7.111V21a1 1 0 0 0 1 1h3a1 1 0 0 1-1-1a4.988 4.988 0 0 1 2.36-4.228A3.469 3.469 0 0 1 8.5 14.5a3.5 3.5 0 0 1 7 0a3.469 3.469 0 0 1-.86 2.272A4.988 4.988 0 0 1 17 21a1 1 0 0 1-1 1h3a1 1 0 0 0 1-1v-9.551z" opacity=".25" fill="currentColor"></path></svg>
|
91 |
+
Organizations</a></li></ul></li>
|
92 |
+
<li><div class="col-span-full px-4 py-0.5 flex items-center justify-between font-semibold bg-gradient-to-r to-white dark:to-gray-925 whitespace-nowrap text-yellow-800 from-yellow-50 dark:text-yellow-100 dark:from-yellow-900">Community
|
93 |
+
</div>
|
94 |
+
<ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
95 |
+
|
96 |
+
hover:underline
|
97 |
+
v2-dropdown-entry" href="https://discuss.huggingface.co/" target="_blank">
|
98 |
+
|
99 |
+
Forum</a></li>
|
100 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
101 |
+
|
102 |
+
hover:underline
|
103 |
+
v2-dropdown-entry" href="/blog">
|
104 |
+
|
105 |
+
Blog</a></li>
|
106 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
107 |
+
|
108 |
+
hover:underline
|
109 |
+
v2-dropdown-entry" href="/join/discord" target="_blank">
|
110 |
+
|
111 |
+
Discord</a></li>
|
112 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
113 |
+
|
114 |
+
hover:underline
|
115 |
+
v2-dropdown-entry" href="/course">
|
116 |
+
|
117 |
+
Course</a></li></ul></li>
|
118 |
+
<li><div class="col-span-full px-4 py-0.5 flex items-center justify-between font-semibold bg-gradient-to-r to-white dark:to-gray-925 whitespace-nowrap text-green-800 from-green-50 dark:text-green-100 dark:from-green-900">Documentation
|
119 |
+
</div>
|
120 |
+
<ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
121 |
+
|
122 |
+
hover:underline
|
123 |
+
v2-dropdown-entry" href="/docs">
|
124 |
+
|
125 |
+
Doc Search</a></li>
|
126 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
127 |
+
|
128 |
+
hover:underline
|
129 |
+
v2-dropdown-entry" href="/docs/hub">
|
130 |
+
|
131 |
+
Hub doc</a></li>
|
132 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
133 |
+
|
134 |
+
hover:underline
|
135 |
+
v2-dropdown-entry" href="/docs/accelerate/">
|
136 |
+
|
137 |
+
Accelerate doc</a></li>
|
138 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
139 |
+
|
140 |
+
hover:underline
|
141 |
+
v2-dropdown-entry" href="/docs/autonlp/">
|
142 |
+
|
143 |
+
AutoNLP doc</a></li>
|
144 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
145 |
+
|
146 |
+
hover:underline
|
147 |
+
v2-dropdown-entry" href="/docs/datasets/">
|
148 |
+
|
149 |
+
Datasets doc</a></li>
|
150 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
151 |
+
|
152 |
+
hover:underline
|
153 |
+
v2-dropdown-entry" href="https://api-inference.huggingface.co/docs/" target="_blank">
|
154 |
+
|
155 |
+
Inference API doc</a></li>
|
156 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
157 |
+
|
158 |
+
hover:underline
|
159 |
+
v2-dropdown-entry" href="/docs/sagemaker">
|
160 |
+
|
161 |
+
SageMaker doc</a></li>
|
162 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
163 |
+
|
164 |
+
hover:underline
|
165 |
+
v2-dropdown-entry" href="/docs/tokenizers/python/latest/">
|
166 |
+
|
167 |
+
Tokenizers doc</a></li>
|
168 |
+
<li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
169 |
+
|
170 |
+
hover:underline
|
171 |
+
v2-dropdown-entry" href="/docs/transformers/">
|
172 |
+
|
173 |
+
Transformers doc</a></li></ul></li>
|
174 |
+
</ul></div>
|
175 |
+
</div></li>
|
176 |
+
|
177 |
+
<li><div class="relative v2-dropdown">
|
178 |
+
<button class="
|
179 |
+
px-2 py-0.5 group hover:text-green-700 dark:hover:text-gray-400 flex items-center
|
180 |
+
|
181 |
+
v2-dropdown-button" type="button">
|
182 |
+
<svg class="mr-1.5 text-gray-400 group-hover:text-green-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-tertiary" d="M19 6H5a3 3 0 0 0-3 3v2.72L8.837 14h6.326L22 11.72V9a3 3 0 0 0-3-3z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M10 6V5h4v1h2V5a2.002 2.002 0 0 0-2-2h-4a2.002 2.002 0 0 0-2 2v1h2zm-1.163 8L2 11.72V18a3.003 3.003 0 0 0 3 3h14a3.003 3.003 0 0 0 3-3v-6.28L15.163 14H8.837z" fill="currentColor"></path></svg>
|
183 |
+
Solutions
|
184 |
+
</button>
|
185 |
+
|
186 |
+
|
187 |
+
<div class="absolute top-full mt-1 min-w-full w-auto bg-white rounded-xl overflow-hidden shadow-lg z-10 border border-gray-100
|
188 |
+
left-0
|
189 |
+
!w-64 v2-dropdown-menu hidden"><ul class="min-w-full w-auto">
|
190 |
+
<li><ul><li><a href="/support" data-ga-category="header-menu" data-ga-action="clicked support" data-ga-label="premium support" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 25 24"><path d="M12.6213 22.4475C12.7096 22.4825 12.8038 22.5003 12.8988 22.5C13.068 22.4991 13.2318 22.4409 13.3638 22.335L17.1138 19.335C17.2022 19.2652 17.2737 19.1763 17.323 19.0751C17.3724 18.9738 17.3983 18.8627 17.3988 18.75V13.0575L20.2338 10.23C21.0025 9.46567 21.6117 8.5563 22.0263 7.55467C22.4409 6.55304 22.6524 5.47907 22.6488 4.39505V3.00005C22.6488 2.60222 22.4908 2.22069 22.2095 1.93939C21.9281 1.65808 21.5466 1.50005 21.1488 1.50005H19.7538C18.6698 1.49639 17.5958 1.70798 16.5942 2.12254C15.5925 2.5371 14.6832 3.14638 13.9188 3.91505L11.0913 6.75005H5.39879C5.28552 6.75146 5.17405 6.77851 5.07273 6.82917C4.97141 6.87983 4.88288 6.95278 4.81379 7.04255L1.81379 10.7925C1.73008 10.8963 1.67553 11.0205 1.65576 11.1523C1.63599 11.2841 1.6517 11.4188 1.70129 11.5425C1.75027 11.6654 1.83088 11.7732 1.93494 11.8548C2.03899 11.9365 2.1628 11.9892 2.29379 12.0075L7.54379 12.7575L11.4063 16.605L12.1563 21.855C12.1747 21.986 12.2274 22.1098 12.309 22.2139C12.3907 22.318 12.4984 22.3986 12.6213 22.4475Z" opacity=".5" fill="currentColor"></path><path d="M11.0837 10.9416L5.08569 16.9396L7.20363 19.0576L13.2017 13.0595L11.0837 10.9416Z" opacity="1" fill="currentColor"></path></svg></div>
|
191 |
+
<div class="font-medium leading-tight">Expert Support
|
192 |
+
<p class="text-sm font-normal text-gray-400 whitespace-nowrap">Accelerate your ML roadmap</p>
|
193 |
+
</div></a>
|
194 |
+
</li><li><a href="/inference-api" data-ga-category="header-menu" data-ga-action="clicked inference api" data-ga-label="accelerated inference" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M8 9H4a2 2 0 0 0-2 2v12h2v-5h4v5h2V11a2 2 0 0 0-2-2zm-4 7v-5h4v5z" fill="currentColor"></path><path d="M22 11h3v10h-3v2h8v-2h-3V11h3V9h-8v2z" fill="currentColor"></path><path d="M14 23h-2V9h6a2 2 0 0 1 2 2v5a2 2 0 0 1-2 2h-4zm0-7h4v-5h-4z" fill="currentColor"></path></svg></div>
|
195 |
+
<div class="font-medium leading-tight">Inference API
|
196 |
+
<p class="text-sm font-normal text-gray-400 whitespace-nowrap">Use +20k models via API calls</p>
|
197 |
+
</div></a>
|
198 |
+
</li><li><a href="/autonlp" data-ga-category="header-menu" data-ga-action="clicked autonlp" data-ga-label="autonlp" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" viewBox="0 0 327 270" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" style="transform: rotate(360deg);"><path fill-rule="evenodd" clip-rule="evenodd" d="M51.963 85.696L51.9843 85.7142L52.0059 85.732C55.1982 88.3543 59.2834 89.5397 63.9988 89.5397H78.4986V69.3054H70.5681V40.5348C70.5681 30.8859 67.5907 23.2435 61.2359 18.1491C55.13 13.0873 46.7983 10.7302 36.5895 10.7302C28.2498 10.7302 21.1616 12.1163 15.4724 15.0602C10.0168 17.8391 5.83986 21.4966 3.12193 26.0961L2.00182 27.9917L17.1258 41.4352L18.8589 39.0419C20.6131 36.6193 22.6796 34.6836 25.06 33.2061C27.1956 31.8806 30.1823 31.1037 34.2242 31.1037C38.6379 31.1037 41.027 32.1553 42.2298 33.5299L42.2688 33.5745L42.3098 33.6174C43.6986 35.0693 44.6292 37.486 44.6292 41.3696V42.3135H35.7547C24.9484 42.3135 16.3129 44.149 10.2104 48.1829C3.87027 52.3738 0.802246 58.8507 0.802246 67.1092C0.802246 74.4698 3.09622 80.537 7.93618 84.9467L7.95908 84.9676L7.98247 84.9879C12.8409 89.2126 19.125 91.2093 26.5719 91.2093C32.519 91.2093 37.7242 89.9194 42.0438 87.1839C44.6415 85.5735 46.7327 83.5117 48.3183 81.0336C49.2175 82.8133 50.4256 84.3783 51.963 85.696ZM49.1349 74.4024C49.1268 74.3369 49.119 74.2711 49.1115 74.205C49.1158 74.2426 49.1201 74.2801 49.1246 74.3175C49.128 74.3458 49.1314 74.3741 49.1349 74.4024ZM42.0076 70.2099C40.0307 71.601 37.5325 72.3663 34.3634 72.3663C31.4475 72.3663 29.4241 71.7802 28.0549 70.8674C26.9105 70.1045 26.3237 69.0722 26.3237 67.3875V64.6048C26.3237 62.5484 27.028 61.3156 28.304 60.4603C29.7696 59.4779 32.3054 58.7913 36.3112 58.7913H44.6292V64.744C44.6292 67.3052 43.7283 68.999 42.0076 70.2099ZM101.033 89.3317L101.067 89.346L101.102 89.3594C104.318 90.6044 107.803 91.2093 111.528 91.2093C117.713 91.2093 122.939 89.6592 126.894 86.2606C128.359 85.0311 129.675 83.697 130.838 82.2601V89.5397H156.776V12.3998H130.838V62.2396C130.838 63.6201 130.55 64.6767 130.077 65.5045C129.516 66.4857 128.783 67.3276 127.859 68.041C127.056 68.6406 126.032 69.1518 124.739 69.5376C123.49 69.8452 122.196 70.001 120.85 70.001C117.332 70.001 115.151 69.0073 113.789 67.4414C112.364 65.8044 111.419 63.0712 111.419 58.7612V12.3998H85.4801V61.5439C85.4801 66.125 86.0642 70.2695 87.2869 73.9374L87.2973 73.9689L87.3086 74C88.6115 77.608 90.3932 80.7401 92.6858 83.3453C94.9963 85.9709 97.7881 87.9709 101.033 89.3317ZM166.262 12.3998V33.7473H185.741V66.1353C185.741 72.7418 187.742 78.3845 191.881 82.8418C196.179 87.4702 202.536 89.5397 210.397 89.5397H239.645V68.1923H211.68V33.7473H239.645V12.3998H211.68V0.359375H187.41V6.58626C187.41 9.65572 186.762 10.8926 186.317 11.3055C185.766 11.8168 184.435 12.3998 181.597 12.3998H166.262ZM272.247 88.5173L272.27 88.5262L272.293 88.5347C277.091 90.3342 282.425 91.2093 288.258 91.2093C294.09 91.2093 299.424 90.3342 304.223 88.5347L304.246 88.5262L304.268 88.5173C309.051 86.6239 313.139 83.9115 316.486 80.3663C319.921 76.8262 322.52 72.5541 324.304 67.5974C326.096 62.6209 326.967 57.0662 326.967 50.9698C326.967 44.8733 326.096 39.3187 324.304 34.3421C322.52 29.3855 319.921 25.1134 316.486 21.5733C313.135 18.0231 309.038 15.3534 304.246 13.5525C299.439 11.6538 294.097 10.7302 288.258 10.7302C282.418 10.7302 277.076 11.6538 272.27 13.5525C267.477 15.3534 263.342 18.0202 259.905 21.5585L259.891 21.5728L259.877 21.5873C256.539 25.1276 253.992 29.3954 252.211 34.3421C250.42 39.3187 249.549 44.8733 249.549 50.9698C249.549 57.0662 250.42 62.6209 252.211 67.5974C253.992 72.5442 256.539 76.812 259.877 80.3523L259.891 80.3668L259.905 80.3811C263.337 83.9145 267.464 86.6239 272.247 88.5173ZM297.126 67.1259C295.135 69.3301 292.301 70.5576 288.258 70.5576C284.214 70.5576 281.38 69.3301 279.389 67.1259C277.376 64.8968 276.183 61.5428 276.183 56.6742V45.2653C276.183 40.3968 277.376 37.0428 279.389 34.8136C281.38 32.6095 284.214 31.382 288.258 31.382C292.301 31.382 295.135 32.6095 297.126 34.8136C299.14 37.0428 300.332 40.3968 300.332 45.2653V56.6742C300.332 61.5428 299.14 64.8968 297.126 67.1259Z" fill="currentColor"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M24.1832 151.837C24.1832 149.255 26.2767 147.161 28.8594 147.161H73.3409C73.3027 147.546 73.2833 147.936 73.2833 148.33V185.74C73.2833 192.196 78.5171 197.43 84.9737 197.43C91.4303 197.43 96.6642 192.196 96.6642 185.74V148.33C96.6642 147.936 96.6448 147.546 96.6066 147.161H117.511C116.885 148.593 116.538 150.175 116.538 151.837V246.53H65.0999V202.107C65.0999 195.65 59.866 190.416 53.4094 190.416C46.9528 190.416 41.7189 195.65 41.7189 202.107V246.53H28.8594C26.2767 246.53 24.1832 244.437 24.1832 241.854V151.837ZM139.919 246.53H234.873C234.702 245.778 234.612 244.996 234.612 244.192C234.612 238.381 229.901 233.671 224.091 233.671H193.695C183.365 233.671 174.99 225.296 174.99 214.966V147.161H138.946C139.572 148.593 139.919 150.175 139.919 151.837V246.53ZM298.91 246.53H257.458C257.805 245.423 257.993 244.245 257.993 243.023V228.995C257.993 218.664 249.619 210.29 239.288 210.29H217.076C206.746 210.29 198.371 201.915 198.371 191.585V147.161H298.91C301.492 147.161 303.586 149.255 303.586 151.837V241.854C303.586 244.437 301.492 246.53 298.91 246.53ZM28.8594 123.78C13.3638 123.78 0.802246 136.342 0.802246 151.837V241.854C0.802246 257.35 13.3638 269.911 28.8594 269.911H298.91C314.405 269.911 326.967 257.35 326.967 241.854V151.837C326.967 136.342 314.405 123.78 298.91 123.78H28.8594ZM240.457 162.359C231.418 162.359 224.091 169.686 224.091 178.726C224.091 187.765 231.418 195.092 240.457 195.092C249.496 195.092 256.824 187.765 256.824 178.726C256.824 169.686 249.496 162.359 240.457 162.359Z" fill="currentColor"></path></svg></div>
|
199 |
+
<div class="font-medium leading-tight">AutoNLP
|
200 |
+
<p class="text-sm font-normal text-gray-400 whitespace-nowrap">Create ML models without code</p>
|
201 |
+
</div></a>
|
202 |
+
</li><li><a href="/infinity" data-ga-category="header-menu" data-ga-action="clicked infinity" data-ga-label="infinity" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" width="1em" height="1em" viewBox="0 0 349 155" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img"><path fill-rule="evenodd" clip-rule="evenodd" d="M77.4254 42.0939C58.0156 42.0939 42.2809 57.799 42.2809 77.1722C42.2809 96.5454 58.0156 112.25 77.4254 112.25V154.344C34.7239 154.344 0.107422 119.793 0.107422 77.1722C0.107422 34.5512 34.7239 0 77.4254 0C116.684 0 144.788 19.3459 167.187 40.3015L137.675 70.504C118.96 53.1048 101.389 42.0939 77.4254 42.0939ZM181.033 114.057C203.574 135.043 231.897 154.344 271.531 154.344V112.25C247.156 112.25 229.306 101.201 210.542 83.8571L181.033 114.057Z" fill="currentColor" opacity="0.5"></path><path fill-rule="evenodd" clip-rule="evenodd" d="M271.141 42.0939C290.551 42.0939 306.286 57.799 306.286 77.1722C306.286 96.5454 290.551 112.25 271.141 112.25V112.304C270.876 112.294 270.609 112.289 270.34 112.289C258.72 112.289 249.3 121.709 249.3 133.329C249.3 144.949 258.72 154.369 270.34 154.369C270.685 154.369 271.027 154.36 271.368 154.344C313.965 154.222 348.459 119.718 348.459 77.1722C348.459 34.5512 313.843 0 271.141 0C219.197 0 186.78 33.8682 161.269 60.5213C160.594 61.227 159.923 61.9276 159.257 62.6224C131.402 91.6825 110.291 112.25 77.0352 112.25V112.289C77.0352 112.289 77.0352 112.289 77.0351 112.289C65.4151 112.289 55.9951 121.709 55.9951 133.329C55.9951 144.949 65.4151 154.369 77.0351 154.369C77.4121 154.369 77.7867 154.359 78.1587 154.339C130.221 153.858 162.646 120.001 188.168 93.3526L188.213 93.306L188.262 93.255C188.754 92.7412 189.243 92.2301 189.73 91.722C217.708 62.5338 238.492 42.0939 271.141 42.0939Z" fill="currentColor"></path></svg></div>
|
203 |
+
<div class="font-medium leading-tight">Infinity
|
204 |
+
<p class="text-sm font-normal text-gray-400 whitespace-nowrap">Optimize to 1ms latency</p>
|
205 |
+
</div></a>
|
206 |
+
</li><li><a href="/hardware" data-ga-category="header-menu" data-ga-action="clicked hardware" data-ga-label="hardware" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 230 230"><path fill-rule="evenodd" clip-rule="evenodd" d="M196.384 70.1626V77.0991C196.378 80.4596 195.489 83.7596 193.808 86.6691C192.126 89.5785 189.71 91.9955 186.801 93.6783L123.359 130.287C120.724 131.532 117.839 132.184 114.926 132.203V106.1L193.797 60.4834C195.474 63.4351 196.365 66.7701 196.384 70.1626ZM196.384 111.562V118.499C196.378 121.859 195.489 125.159 193.808 128.068C192.126 130.978 189.71 133.395 186.801 135.078L123.359 171.686C120.724 172.932 117.839 173.584 114.926 173.603V147.499L193.797 101.883C195.474 104.834 196.365 108.169 196.384 111.562ZM196.384 159.897V152.96C196.365 149.568 195.474 146.233 193.797 143.281L114.926 188.898V215.001C117.839 214.982 120.724 214.33 123.359 213.084L186.801 176.476C189.71 174.793 192.126 172.376 193.808 169.467C195.489 166.557 196.378 163.257 196.384 159.897Z" fill="currentColor"></path><path opacity="0.25" fill-rule="evenodd" clip-rule="evenodd" d="M193.605 60.4489L160.32 79.7003L186.897 95.0442C189.676 96.7021 191.985 99.05 193.605 101.848L160.32 121.099L186.897 136.443C189.676 138.101 191.985 140.448 193.605 143.247L114.734 188.863L35.8633 143.247C37.4824 140.444 39.7917 138.101 42.5716 136.443L44.3827 135.397L106.355 171.226C109.073 172.546 112.054 173.234 115.075 173.239V147.499L36.2047 101.883C36.1814 101.924 36.1583 101.965 36.1353 102.006L35.8633 101.848C37.4824 99.0452 39.7917 96.7029 42.5716 95.0442L44.3836 93.9981L106.355 129.827C109.073 131.147 112.054 131.835 115.075 131.839V106.1L36.2047 60.4834C36.1814 60.5243 36.1583 60.5652 36.1353 60.6063L35.8633 60.4489C37.4824 57.6458 39.7917 55.3035 42.5716 53.6448L105.151 17.5156C111.092 14.1615 118.376 14.1615 124.317 17.5156L186.897 53.6448C189.676 55.3027 191.985 57.6506 193.605 60.4489Z" fill="currentColor"></path><path opacity="0.5" fill-rule="evenodd" clip-rule="evenodd" d="M115.075 131.435V105.696L36.2047 60.0791C34.5242 63.0293 33.633 66.3631 33.6172 69.7583V76.3307C33.6235 79.6912 34.512 82.9912 36.1938 85.9006C37.8755 88.8101 40.2917 91.227 43.2005 92.9098L106.355 129.422C109.073 130.742 112.054 131.43 115.075 131.435ZM115.075 147.499V173.239C112.054 173.234 109.073 172.546 106.355 171.226L43.2005 134.714C40.2917 133.031 37.8755 130.614 36.1938 127.704C34.512 124.795 33.6235 121.495 33.6172 118.134V111.562C33.633 108.167 34.5242 104.833 36.2047 101.883L115.075 147.499ZM115.075 188.898V214.637C112.054 214.633 109.073 213.945 106.355 212.624L43.2005 176.112C40.2917 174.429 37.8755 172.012 36.1938 169.103C34.512 166.193 33.6235 162.893 33.6172 159.533V152.96C33.633 149.565 34.5242 146.231 36.2047 143.281L115.075 188.898Z" fill="currentColor"></path></svg></div>
|
207 |
+
<div class="font-medium leading-tight">Hardware
|
208 |
+
<p class="text-sm font-normal text-gray-400 whitespace-nowrap">Scale with dedicated hardware</p>
|
209 |
+
</div></a>
|
210 |
+
</li><li><a href="/platform" data-ga-category="header-menu" data-ga-action="clicked platform" data-ga-label="platform" class="flex items-center group hover:bg-gradient-to-r hover:from-gray-100 p-2 w-80 dark:hover:from-gray-800"><div class="h-9 w-9 bg-gradient-to-tr dark:bg-gray-800 bg-gray-100 group-hover:bg-white dark:group-hover:bg-black rounded mr-1.5 flex items-center justify-center flex-none"><svg class="text-lg text-gray-500 group-hover:text-gray-600 dark:group-hover:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg></div>
|
211 |
+
<div class="font-medium leading-tight">Platform
|
212 |
+
<p class="text-sm font-normal text-gray-400 whitespace-nowrap">Collaborate better on ML</p>
|
213 |
+
</div></a>
|
214 |
+
</li></ul></li>
|
215 |
+
</ul></div>
|
216 |
+
</div></li>
|
217 |
+
|
218 |
+
<li><a class="flex items-center group px-2 py-0.5 hover:text-gray-500 dark:hover:text-gray-400" href="/pricing" data-ga-category="header-menu" data-ga-action="clicked pricing" data-ga-label="pricing">Pricing
|
219 |
+
</a></li>
|
220 |
+
<li><hr class="mx-2 w-0.5 h-5 border-none bg-gray-100 dark:bg-gray-800"></li>
|
221 |
+
<li><a class="px-2 py-0.5 block cursor-pointer hover:text-gray-500 dark:hover:text-gray-400" href="/login">Log In
|
222 |
+
</a></li>
|
223 |
+
<li><a class="ml-2 btn" href="/join">Sign Up </a></li></ul></nav></div></header>
|
224 |
+
|
225 |
+
|
226 |
+
<main class="flex flex-col flex-1 "><header class="bg-gradient-to-t from-gray-50-to-white via-white dark:via-gray-950 pt-10 "><div class="container relative"><h1 class="flex items-center flex-wrap text-lg leading-tight mb-2 md:text-xl ">
|
227 |
+
<div class="flex items-center mb-1"><img class="w-4 h-4 mr-1.5 rounded" alt="Flax Community's picture" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1624969772076-5dfcb1aada6d0311fd3d5448.jpeg?w=200&h=200&f=face">
|
228 |
+
<a href="/flax-community" class="font-sans text-gray-400 hover:text-blue-600">flax-community</a>
|
229 |
+
<div class="text-gray-300 mx-0.5">/</div></div>
|
230 |
+
<div class="mb-1"><a class="font-mono font-semibold" href="/flax-community/gpt2-medium-indonesian">gpt2-medium-indonesian</a>
|
231 |
+
<div class="SVELTE_HYDRATER inline mr-5" data-props="{"label":"objectInfo name","noText":true,"value":"flax-community/gpt2-medium-indonesian"}" data-target="CopyButton"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none
|
232 |
+
|
233 |
+
mx-0.5
|
234 |
+
text-gray-600
|
235 |
+
|
236 |
+
" title="Copy objectInfo name to clipboard" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
|
237 |
+
|
238 |
+
<div class="
|
239 |
+
absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow
|
240 |
+
left-1/2 top-full transform -translate-x-1/2 translate-y-2
|
241 |
+
opacity-0
|
242 |
+
"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="
|
243 |
+
border-left-color: transparent;
|
244 |
+
border-right-color: transparent;
|
245 |
+
"></div>
|
246 |
+
Copied</div></button></div></div>
|
247 |
+
<div class="SVELTE_HYDRATER mr-5 mb-1" data-props="{"isLikedByUser":false,"likes":0,"repoId":"flax-community/gpt2-medium-indonesian","repoType":"model"}" data-target="LikeButton"><div class="inline-flex items-center border leading-none whitespace-nowrap text-sm rounded-md text-gray-500 overflow-hidden bg-white
|
248 |
+
"><button class="relative flex items-center px-1.5 py-1 hover:bg-gradient-to-t focus:outline-none from-red-50 to-transparent dark:from-red-900 dark:to-red-800 overflow-hidden" title="Like"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" fill="currentColor"><path d="M22.45,6a5.47,5.47,0,0,1,3.91,1.64,5.7,5.7,0,0,1,0,8L16,26.13,5.64,15.64a5.7,5.7,0,0,1,0-8,5.48,5.48,0,0,1,7.82,0L16,10.24l2.53-2.58A5.44,5.44,0,0,1,22.45,6m0-2a7.47,7.47,0,0,0-5.34,2.24L16,7.36,14.89,6.24a7.49,7.49,0,0,0-10.68,0,7.72,7.72,0,0,0,0,10.82L16,29,27.79,17.06a7.72,7.72,0,0,0,0-10.82A7.49,7.49,0,0,0,22.45,4Z"></path></svg>
|
249 |
+
|
250 |
+
<svg class="mr-1 absolute text-red-500 origin-center transform transition ease-in\n\t\t\t\ttranslate-y-10 scale-0" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" fill="currentColor"><path d="M22.5,4c-2,0-3.9,0.8-5.3,2.2L16,7.4l-1.1-1.1C12,3.3,7.2,3.3,4.3,6.2c0,0-0.1,0.1-0.1,0.1c-3,3-3,7.8,0,10.8L16,29l11.8-11.9c3-3,3-7.8,0-10.8C26.4,4.8,24.5,4,22.5,4z"></path></svg>
|
251 |
+
like
|
252 |
+
</button>
|
253 |
+
<button class="flex items-center px-1.5 py-1 border-l text-gray-400 focus:outline-none hover:bg-gray-50 dark:hover:bg-gray-700 focus:bg-gray-100 " title="See users who liked this repository">0</button></div>
|
254 |
+
</div>
|
255 |
+
</h1>
|
256 |
+
<div class="SVELTE_HYDRATER " data-props="{"tagObjs":[{"id":"text-generation","label":"Text Generation","subType":"nlp","type":"pipeline_tag"},{"id":"pytorch","label":"PyTorch","type":"library"},{"id":"jax","label":"JAX","type":"library"},{"id":"tensorboard","label":"TensorBoard","type":"library"},{"id":"transformers","label":"Transformers","type":"library"},{"id":"id","label":"id","type":"language"},{"id":"gpt2","label":"gpt2","type":"other"}]}" data-target="ModelHeaderTags"><div class="flex flex-wrap mb-3 lg:mb-5"><a class="tag
|
257 |
+
tag-white" href="/models?pipeline_tag=text-generation">
|
258 |
+
<div class="tag-ico tag-ico-indigo"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 18 18"><path d="M16.2607 8.08202L14.468 6.28928C14.3063 6.12804 14.0873 6.03749 13.859 6.03749C13.6307 6.03749 13.4117 6.12804 13.25 6.28928L5.6375 13.904V16.9125H8.64607L16.2607 9.30002C16.422 9.13836 16.5125 8.91935 16.5125 8.69102C16.5125 8.4627 16.422 8.24369 16.2607 8.08202V8.08202ZM8.1953 15.825H6.725V14.3547L11.858 9.22118L13.3288 10.6915L8.1953 15.825ZM14.0982 9.92262L12.6279 8.45232L13.8606 7.21964L15.3309 8.68994L14.0982 9.92262Z"></path><path d="M6.18125 9.84373H7.26875V6.03748H8.9V4.94998H4.55V6.03748H6.18125V9.84373Z"></path><path d="M4.55 11.475H2.375V2.775H11.075V4.95H12.1625V2.775C12.1625 2.48658 12.0479 2.20997 11.844 2.00602C11.64 1.80208 11.3634 1.6875 11.075 1.6875H2.375C2.08658 1.6875 1.80997 1.80208 1.60602 2.00602C1.40207 2.20997 1.2875 2.48658 1.2875 2.775V11.475C1.2875 11.7634 1.40207 12.04 1.60602 12.244C1.80997 12.4479 2.08658 12.5625 2.375 12.5625H4.55V11.475Z"></path></svg></div>
|
259 |
+
<span>Text Generation</span>
|
260 |
+
</a><a class="tag
|
261 |
+
tag-red" href="/models?library=pytorch"><svg class="inline-block ml-2 text-sm" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="0.83em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 310"><path d="M218.281 90.106c50.292 50.292 50.292 130.969 0 181.61c-49.244 50.292-130.27 50.292-180.562 0s-50.292-131.318 0-181.61L127.825 0v45.053l-8.382 8.382l-59.721 59.722c-37.72 37.02-37.72 97.79 0 135.509c37.02 37.719 97.79 37.719 135.509 0c37.719-37.02 37.719-97.79 0-135.51l23.05-23.05zm-45.053-5.588c-9.259 0-16.764-7.505-16.764-16.764c0-9.258 7.505-16.764 16.764-16.764c9.258 0 16.764 7.506 16.764 16.764c0 9.259-7.506 16.764-16.764 16.764z" fill="#EE4C2C"></path></svg>
|
262 |
+
<span>PyTorch</span>
|
263 |
+
</a><a class="tag
|
264 |
+
tag-red" href="/models?library=jax"><svg class="inline-block ml-2 text-sm" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1.73em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 451 260.81"><style>.J {
|
265 |
+
stroke: #dce0df;
|
266 |
+
}
|
267 |
+
.K {
|
268 |
+
stroke-linejoin: round;
|
269 |
+
}
|
270 |
+
</style><g fill="#5e97f6" class="J K"><path d="M50.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M.5 217.01l25-43.3h50l-25 43.3H.5z"></path><path d="M125.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M175.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M150.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M175.5 87.1l-25 43.3h50l25-43.3h-50z"></path><path d="M200.5 43.8l-25 43.3h50l25-43.3h-50z"></path><path d="M225.5.5l-25 43.3h50l25-43.3h-50z"></path></g><g fill="#2a56c6" class="J K"><path d="M.5 217.01l25 43.3h50l-25-43.3H.5z"></path><path d="M125.5 260.31h-50l-25-43.3h50l25 43.3z"></path><path d="M175.5 260.31h-50l-25-43.3h50l25 43.3z"></path></g><g fill="#00796b" class="J K"><path d="M200.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zm50-86.61l-25-43.3-25 43.3h50z"></path><path d="M250.5 43.8l-25 43.3 25 43.3 25-43.3-25-43.3z"></path></g><path d="M125.5 173.71l-25-43.31-25 43.31h50z" fill="#3367d6" class="J K"></path><g fill="#26a69a" class="J K"><path d="M250.5 130.4h-50l-25 43.31h50l25-43.31z"></path><path d="M300.5 130.4h-50l-25 43.31h50l25-43.31z"></path></g><g fill="#9c27b0" class="J K"><path d="M350.5 43.8L325.5.5l-25 43.3 25 43.3 25-43.3z"></path><path d="M375.5 87.1l-25-43.3-25 43.3 25 43.3 25-43.3z"></path><path d="M400.5 130.4l-25-43.3-25 43.3 25 43.31 25-43.31z"></path><path d="M425.5 173.71l-25-43.31-25 43.31 25 43.3 25-43.3z"></path><path d="M450.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zM425.5.5l-25 43.3 25 43.3 25-43.3-25-43.3z"></path><path d="M375.5 87.1l25-43.3 25 43.3-25 43.3-25-43.3zm-25 43.3l-25 43.31 25 43.3 25-43.3-25-43.31z"></path><path d="M325.5 260.31l-25-43.3 25-43.3 25 43.3-25 43.3z"></path></g><path d="M275.5 260.31l-25-43.3h50l25 43.3h-50z" fill="#6a1b9a" class="J K"></path><g fill="#00695c" class="J K"><path d="M225.5 173.71h-50l25 43.3h50l-25-43.3z"></path><path d="M275.5 173.71h-50l25 43.3 25-43.3zm0-86.61l25 43.3h50l-25-43.3h-50z"></path><path d="M300.5 43.8h-50l25 43.3h50l-25-43.3zm125 216.51l-25-43.3h-50l25 43.3h50z"></path><path d="M375.5 173.71l-25 43.3h50l-25-43.3z"></path></g><g fill="#ea80fc" class="J K"><path d="M325.5.5h-50l-25 43.3h50l25-43.3zm0 173.21h-50l-25 43.3h50l25-43.3z"></path><path d="M350.5 130.4h-50l-25 43.31h50l25-43.31zM425.5.5h-50l-25 43.3h50l25-43.3z"></path><path d="M375.5 87.1l-25-43.3h50l-25 43.3z"></path></g></svg>
|
271 |
+
<span>JAX</span>
|
272 |
+
</a><a class="tag
|
273 |
+
tag-red" href="/models?library=tensorboard">
|
274 |
+
<span>TensorBoard</span>
|
275 |
+
</a><a class="tag
|
276 |
+
tag-red" href="/models?library=transformers">
|
277 |
+
<span>Transformers</span>
|
278 |
+
</a><a class="tag
|
279 |
+
tag-green" href="/models?language=id">
|
280 |
+
<span>id</span>
|
281 |
+
</a><a class="tag
|
282 |
+
tag-purple" href="/models?other=gpt2">
|
283 |
+
<span>gpt2</span>
|
284 |
+
</a></div></div>
|
285 |
+
<div class="border-b border-gray-100"><div class="flex flex-col-reverse lg:flex-row lg:items-center lg:justify-between"><div class="flex items-center h-12 -mb-px overflow-x-auto overflow-y-hidden"><a class="tab-alternate " href="/flax-community/gpt2-medium-indonesian"><svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-quaternary" d="M20.23 7.24L12 12L3.77 7.24a1.98 1.98 0 0 1 .7-.71L11 2.76c.62-.35 1.38-.35 2 0l6.53 3.77c.29.173.531.418.7.71z" opacity=".25" fill="currentColor"></path><path class="uim-tertiary" d="M12 12v9.5a2.09 2.09 0 0 1-.91-.21L4.5 17.48a2.003 2.003 0 0 1-1-1.73v-7.5a2.06 2.06 0 0 1 .27-1.01L12 12z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M20.5 8.25v7.5a2.003 2.003 0 0 1-1 1.73l-6.62 3.82c-.275.13-.576.198-.88.2V12l8.23-4.76c.175.308.268.656.27 1.01z" fill="currentColor"></path></svg>
|
286 |
+
Model card
|
287 |
+
</a>
|
288 |
+
<a class="tab-alternate active" href="/flax-community/gpt2-medium-indonesian/tree/main"><svg class="mr-1.5 text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24"><path class="uim-tertiary" d="M21 19h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2zm0-4h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2zm0-8h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2zm0 4h-8a1 1 0 0 1 0-2h8a1 1 0 0 1 0 2z" opacity=".5" fill="currentColor"></path><path class="uim-primary" d="M9 19a1 1 0 0 1-1-1V6a1 1 0 0 1 2 0v12a1 1 0 0 1-1 1zm-6-4.333a1 1 0 0 1-.64-1.769L3.438 12l-1.078-.898a1 1 0 0 1 1.28-1.538l2 1.667a1 1 0 0 1 0 1.538l-2 1.667a.999.999 0 0 1-.64.231z" fill="currentColor"></path></svg>
|
289 |
+
<span class="xl:hidden">Files</span>
|
290 |
+
<span class="hidden xl:inline">Files and versions</span></a>
|
291 |
+
<a class="tab-alternate " href="/flax-community/gpt2-medium-indonesian/tensorboard"><svg class="mr-1.5 text-gray-300 dark:text-gray-500 w-3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid" viewBox="0 0 256 287"><path d="M133.446809,0 L256,69.7191489 L256,133.446809 L182.468085,90.4170213 L182.468085,122.553191 L218.961702,143.795745 L219.506383,198.808511 L182.468085,177.565957 L182.468085,258.178723 L133.446809,286.502128 L133.446809,0 Z M122.553191,0 L122.553191,286.502128 L73.5319149,258.178723 L73.5319149,90.4170213 L0,133.446809 L0,69.7191489 L122.553191,0 Z" fill="currentColor"></path></svg>
|
292 |
+
<span class="xl:hidden">Metrics</span>
|
293 |
+
<span class="hidden xl:inline">Training metrics</span></a>
|
294 |
+
</div>
|
295 |
+
<div class="SVELTE_HYDRATER " data-props="{"model":{"author":"flax-community","branch":"main","cardData":{"language":"id","widget":[{"text":"Sewindu sudah kita tak berjumpa, rinduku padamu sudah tak terkira."}]},"cardExists":true,"config":{"architectures":["GPT2LMHeadModel"],"model_type":"gpt2","task_specific_params":{"text-generation":{"do_sample":true,"max_length":50}}},"id":"flax-community/gpt2-medium-indonesian","lastModified":"2021-09-02T12:22:45.000Z","pipeline_tag":"text-generation","library_name":"transformers","model-index":null,"private":false,"gated":false,"pwcLink":{"error":"Unknown error, can't generate link to Papers With Code."},"siblings":[{"rfilename":".gitattributes"},{"rfilename":".gitignore"},{"rfilename":"README.md"},{"rfilename":"added_tokens.json"},{"rfilename":"config.json"},{"rfilename":"create_config.py"},{"rfilename":"create_tokenizer.py"},{"rfilename":"events.out.tfevents.1625840127.t1v-n-528d9406-w-0.245719.3.v2"},{"rfilename":"events.out.tfevents.1625843003.t1v-n-528d9406-w-0.250031.3.v2"},{"rfilename":"events.out.tfevents.1625892207.t1v-n-528d9406-w-0.296755.3.v2"},{"rfilename":"flax_model.msgpack"},{"rfilename":"jax2torch.py"},{"rfilename":"merges.txt"},{"rfilename":"pytorch_model.bin"},{"rfilename":"replace_token_script.py"},{"rfilename":"run_clm_flax.py"},{"rfilename":"run_finetuning.sh"},{"rfilename":"run_pretraining.sh"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer_config.json"},{"rfilename":"vocab.json"},{"rfilename":"text_collection/README.md"},{"rfilename":"text_collection/text_collection.py"}],"tags":["pytorch","jax","tensorboard","gpt2","text-generation","id","transformers"],"tag_objs":[{"id":"text-generation","label":"Text Generation","subType":"nlp","type":"pipeline_tag"},{"id":"pytorch","label":"PyTorch","type":"library"},{"id":"jax","label":"JAX","type":"library"},{"id":"tensorboard","label":"TensorBoard","type":"library"},{"id":"transformers","label":"Transformers","type":"library"},{"id":"id","label":"id","type":"language"},{"id":"gpt2","label":"gpt2","type":"other"}],"transformersInfo":{"auto_model":"AutoModelForCausalLM","pipeline_tag":"text-generation","processor":"AutoTokenizer"},"widgetData":[{"text":"Sewindu sudah kita tak berjumpa, rinduku padamu sudah tak terkira."}],"likes":0,"isLikedByUser":false}}" data-target="ModelHeaderActions">
|
296 |
+
|
297 |
+
|
298 |
+
<div class="relative mb-1.5 space-y-1 sm:flex sm:space-y-0 sm:space-x-1.5 lg:mb-0"><div><div class="relative ">
|
299 |
+
<button class="
|
300 |
+
text-sm btn
|
301 |
+
cursor-pointer w-full btn text-sm
|
302 |
+
" type="button">
|
303 |
+
<svg class="mr-1.5 " xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M12.1 2a9.8 9.8 0 0 0-5.4 1.6l6.4 6.4a2.1 2.1 0 0 1 .2 3a2.1 2.1 0 0 1-3-.2L3.7 6.4A9.84 9.84 0 0 0 2 12.1a10.14 10.14 0 0 0 10.1 10.1a10.9 10.9 0 0 0 2.6-.3l6.7 6.7a5 5 0 0 0 7.1-7.1l-6.7-6.7a10.9 10.9 0 0 0 .3-2.6A10 10 0 0 0 12.1 2zm8 10.1a7.61 7.61 0 0 1-.3 2.1l-.3 1.1l.8.8l6.7 6.7a2.88 2.88 0 0 1 .9 2.1A2.72 2.72 0 0 1 27 27a2.9 2.9 0 0 1-4.2 0l-6.7-6.7l-.8-.8l-1.1.3a7.61 7.61 0 0 1-2.1.3a8.27 8.27 0 0 1-5.7-2.3A7.63 7.63 0 0 1 4 12.1a8.33 8.33 0 0 1 .3-2.2l4.4 4.4a4.14 4.14 0 0 0 5.9.2a4.14 4.14 0 0 0-.2-5.9L10 4.2a6.45 6.45 0 0 1 2-.3a8.27 8.27 0 0 1 5.7 2.3a8.49 8.49 0 0 1 2.4 5.9z" fill="currentColor"></path></svg>
|
304 |
+
Train
|
305 |
+
<svg class="-mr-1 text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M7 10l5 5l5-5z" fill="currentColor"></path></svg></button>
|
306 |
+
|
307 |
+
|
308 |
+
|
309 |
+
</div>
|
310 |
+
</div>
|
311 |
+
<div><div class="relative ">
|
312 |
+
<button class="
|
313 |
+
text-sm btn
|
314 |
+
cursor-pointer w-full btn text-sm
|
315 |
+
" type="button">
|
316 |
+
<svg class="mr-1.5 " xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><rect x="6.34" y="19" width="11.31" height="2" transform="translate(-10.63 14.34) rotate(-45)"></rect><path d="M17,30a1,1,0,0,1-.37-.07,1,1,0,0,1-.62-.79l-1-7,2-.28.75,5.27L21,24.52V17a1,1,0,0,1,.29-.71l4.07-4.07A8.94,8.94,0,0,0,28,5.86V4H26.14a8.94,8.94,0,0,0-6.36,2.64l-4.07,4.07A1,1,0,0,1,15,11H7.48L4.87,14.26l5.27.75-.28,2-7-1a1,1,0,0,1-.79-.62,1,1,0,0,1,.15-1l4-5A1,1,0,0,1,7,9h7.59l3.77-3.78A10.92,10.92,0,0,1,26.14,2H28a2,2,0,0,1,2,2V5.86a10.92,10.92,0,0,1-3.22,7.78L23,17.41V25a1,1,0,0,1-.38.78l-5,4A1,1,0,0,1,17,30Z"></path></svg>
|
317 |
+
Deploy
|
318 |
+
<svg class="-mr-1 text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M7 10l5 5l5-5z" fill="currentColor"></path></svg></button>
|
319 |
+
|
320 |
+
|
321 |
+
|
322 |
+
</div>
|
323 |
+
</div>
|
324 |
+
<div class=" "><button class="cursor-pointer w-full
|
325 |
+
|
326 |
+
btn
|
327 |
+
text-sm" type="button" ><svg class="mr-1.5 " xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7z" fill="currentColor"></path><path d="M1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7z" fill="currentColor"></path><path d="M12.419 25.484L17.639 6l1.932.518L14.35 26z" fill="currentColor"></path></svg>
|
328 |
+
Use in Transformers</button>
|
329 |
+
</div></div></div>
|
330 |
+
</div></div></div></header>
|
331 |
+
|
332 |
+
<div class="container relative flex flex-col md:grid md:space-y-0 w-full
|
333 |
+
md:grid-cols-12
|
334 |
+
|
335 |
+
space-y-4
|
336 |
+
md:gap-6
|
337 |
+
mb-16
|
338 |
+
"><section class="pt-8 border-gray-100 col-span-full"><header class="pb-2 flex items-center justify-between flex-wrap"><div class="flex flex-wrap items-center"><div class="relative mr-4 mb-2 v2-dropdown">
|
339 |
+
<button class="
|
340 |
+
text-base
|
341 |
+
cursor-pointer w-full btn text-sm
|
342 |
+
v2-dropdown-button" type="button">
|
343 |
+
<svg class="mr-1.5 text-gray-700 dark:text-gray-400" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M13 14c-3.36 0-4.46 1.35-4.82 2.24C9.25 16.7 10 17.76 10 19a3 3 0 0 1-3 3a3 3 0 0 1-3-3c0-1.31.83-2.42 2-2.83V7.83A2.99 2.99 0 0 1 4 5a3 3 0 0 1 3-3a3 3 0 0 1 3 3c0 1.31-.83 2.42-2 2.83v5.29c.88-.65 2.16-1.12 4-1.12c2.67 0 3.56-1.34 3.85-2.23A3.006 3.006 0 0 1 14 7a3 3 0 0 1 3-3a3 3 0 0 1 3 3c0 1.34-.88 2.5-2.09 2.86C17.65 11.29 16.68 14 13 14m-6 4a1 1 0 0 0-1 1a1 1 0 0 0 1 1a1 1 0 0 0 1-1a1 1 0 0 0-1-1M7 4a1 1 0 0 0-1 1a1 1 0 0 0 1 1a1 1 0 0 0 1-1a1 1 0 0 0-1-1m10 2a1 1 0 0 0-1 1a1 1 0 0 0 1 1a1 1 0 0 0 1-1a1 1 0 0 0-1-1z" fill="currentColor"></path></svg>
|
344 |
+
main
|
345 |
+
<svg class="-mr-1 text-gray-500" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24" style="transform: rotate(360deg);"><path d="M7 10l5 5l5-5z" fill="currentColor"></path></svg></button>
|
346 |
+
|
347 |
+
|
348 |
+
<div class="absolute top-full mt-1 min-w-full w-auto bg-white rounded-xl overflow-hidden shadow-lg z-10 border border-gray-100
|
349 |
+
left-0
|
350 |
+
v2-dropdown-menu hidden"><ul class="min-w-full w-auto">
|
351 |
+
<li><ul><li><a class="flex items-center hover:bg-gray-50 dark:hover:bg-gray-800 cursor-pointer px-3 py-1.5 whitespace-nowrap
|
352 |
+
|
353 |
+
|
354 |
+
v2-dropdown-entry" href="/flax-community/gpt2-medium-indonesian/blob/main/tokenizer_config.json">
|
355 |
+
|
356 |
+
main</a></li></ul></li>
|
357 |
+
<li><ul class="bg-yellow-50"></ul></li>
|
358 |
+
</ul></div>
|
359 |
+
</div>
|
360 |
+
<div class="flex items-center flex-wrap mb-2"><a class="hover:underline text-gray-800" href="/flax-community/gpt2-medium-indonesian/tree/main">gpt2-medium-indonesian</a>
|
361 |
+
<span class="text-gray-300 mx-1 font-light">/</span>
|
362 |
+
<span class="font-light dark:text-gray-300">tokenizer_config.json</span>
|
363 |
+
|
364 |
+
</div></div>
|
365 |
+
|
366 |
+
<div class="flex flex-row items-center mb-2">
|
367 |
+
</div></header>
|
368 |
+
<div class="border border-b-0 dark:border-gray-800 px-3 py-2 flex items-baseline rounded-t-lg bg-gradient-to-t from-gray-100-to-white"><img class="w-4 h-4 rounded-full mt-0.5 mr-2.5 self-center" alt="cahya's picture" src="https://aeiljuispo.cloudimg.io/v7/https://s3.amazonaws.com/moonup/production/uploads/1597134725486-5e4d607b37cb5b49818287c0.jpeg?w=200&h=200&f=face">
|
369 |
+
<div class="mr-5 truncate flex items-center flex-none"><a class="hover:underline" href="/cahya">cahya
|
370 |
+
</a>
|
371 |
+
|
372 |
+
</div>
|
373 |
+
<a class="mr-4 font-mono text-sm text-gray-500 truncate hover:underline" href="/flax-community/gpt2-medium-indonesian/commit/e7934b783a74f9bac5a7cb05ec98cda450f46f7e">add tokenizers files</a>
|
374 |
+
<a class="text-sm border dark:border-gray-800 px-1.5 rounded bg-gray-50 dark:bg-gray-900 hover:underline" href="/flax-community/gpt2-medium-indonesian/commit/e7934b783a74f9bac5a7cb05ec98cda450f46f7e">e7934b7</a>
|
375 |
+
|
376 |
+
|
377 |
+
<time class="ml-auto hidden lg:block text-gray-500 dark:text-gray-400 truncate flex-none pl-2" datetime="2021-07-10T05:40:02" title="Sat, 10 Jul 2021 05:40:02 GMT">5 months ago</time></div>
|
378 |
+
<div class="flex flex-wrap items-center justify-between px-3 py-1.5 border dark:border-gray-800 text-sm text-gray-800 dark:bg-gray-900"><div class="flex flex-wrap items-center"><a class="flex items-center hover:underline my-1 mr-4" href="/flax-community/gpt2-medium-indonesian/raw/main/tokenizer_config.json"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M31 16l-7 7l-1.41-1.41L28.17 16l-5.58-5.59L24 9l7 7z" fill="currentColor"></path><path d="M1 16l7-7l1.41 1.41L3.83 16l5.58 5.59L8 23l-7-7z" fill="currentColor"></path><path d="M12.419 25.484L17.639 6l1.932.518L14.35 26z" fill="currentColor"></path></svg>
|
379 |
+
raw
|
380 |
+
</a><a class="flex items-center hover:underline my-1 mr-4" href="/flax-community/gpt2-medium-indonesian/commits/main/tokenizer_config.json"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M16 4C9.383 4 4 9.383 4 16s5.383 12 12 12s12-5.383 12-12S22.617 4 16 4zm0 2c5.535 0 10 4.465 10 10s-4.465 10-10 10S6 21.535 6 16S10.465 6 16 6zm-1 2v9h7v-2h-5V8z" fill="currentColor"></path></svg>
|
381 |
+
history
|
382 |
+
</a><a class="flex items-center hover:underline my-1 mr-4" href="/flax-community/gpt2-medium-indonesian/blame/main/tokenizer_config.json"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32" style="transform: rotate(360deg);"><path d="M16 2a14 14 0 1 0 14 14A14 14 0 0 0 16 2zm0 26a12 12 0 1 1 12-12a12 12 0 0 1-12 12z" fill="currentColor"></path><path d="M11.5 11a2.5 2.5 0 1 0 2.5 2.5a2.48 2.48 0 0 0-2.5-2.5z" fill="currentColor"></path><path d="M20.5 11a2.5 2.5 0 1 0 2.5 2.5a2.48 2.48 0 0 0-2.5-2.5z" fill="currentColor"></path></svg>
|
383 |
+
blame
|
384 |
+
</a></div>
|
385 |
+
<div class="dark:text-gray-300">207 Bytes</div></div>
|
386 |
+
|
387 |
+
<div class="border border-t-0 rounded-b-lg dark:bg-gray-925 dark:border-gray-800 leading-tight"><div class="py-3"><div class="SVELTE_HYDRATER " data-props="{"lines":["{<span class=\\"hljs-attr\\">&quot;unk_token&quot;</span>: <span class=\\"hljs-string\\">&quot;&lt;|endoftext|&gt;&quot;</span>, <span class=\\"hljs-attr\\">&quot;bos_token&quot;</span>: <span class=\\"hljs-string\\">&quot;&lt;|endoftext|&gt;&quot;</span>, <span class=\\"hljs-attr\\">&quot;eos_token&quot;</span>: <span class=\\"hljs-string\\">&quot;&lt;|endoftext|&gt;&quot;</span>, <span class=\\"hljs-attr\\">&quot;add_prefix_space&quot;</span>: <span class=\\"hljs-literal\\">false</span>, <span class=\\"hljs-attr\\">&quot;special_tokens_map_file&quot;</span>: <span class=\\"hljs-literal\\">null</span>, <span class=\\"hljs-attr\\">&quot;name_or_path&quot;</span>: <span class=\\"hljs-string\\">&quot;.&quot;</span>, <span class=\\"hljs-attr\\">&quot;tokenizer_class&quot;</span>: <span class=\\"hljs-string\\">&quot;GPT2Tokenizer&quot;</span>}"]}" data-target="BlobContent"><div class="relative text-sm"><div class="overflow-x-auto"><table><tr class="" id="L1"><td class="text-right select-none pl-5 pr-3 cursor-pointer text-gray-300 hover:text-black"><pre>1</pre></td>
|
388 |
+
<td class="px-3 w-full"><pre>{<span class="hljs-attr">"unk_token"</span>: <span class="hljs-string">"<|endoftext|>"</span>, <span class="hljs-attr">"bos_token"</span>: <span class="hljs-string">"<|endoftext|>"</span>, <span class="hljs-attr">"eos_token"</span>: <span class="hljs-string">"<|endoftext|>"</span>, <span class="hljs-attr">"add_prefix_space"</span>: <span class="hljs-literal">false</span>, <span class="hljs-attr">"special_tokens_map_file"</span>: <span class="hljs-literal">null</span>, <span class="hljs-attr">"name_or_path"</span>: <span class="hljs-string">"."</span>, <span class="hljs-attr">"tokenizer_class"</span>: <span class="hljs-string">"GPT2Tokenizer"</span>}</pre></td>
|
389 |
+
</tr></table></div>
|
390 |
+
</div></div></div></div></section></div></main>
|
391 |
+
</div>
|
392 |
+
|
393 |
+
<script>
|
394 |
+
import("/front/build/module/index.219a3fdc.js");
|
395 |
+
window.supportsDynamicImport = true;
|
396 |
+
</script>
|
397 |
+
<script>
|
398 |
+
if (!window.supportsDynamicImport) {
|
399 |
+
const systemJsLoaderTag = document.createElement("script");
|
400 |
+
systemJsLoaderTag.src =
|
401 |
+
"https://unpkg.com/[email protected]/dist/s.min.js";
|
402 |
+
systemJsLoaderTag.addEventListener("load", function () {
|
403 |
+
System.import("./front/build/nomodule/index.219a3fdc.js");
|
404 |
+
});
|
405 |
+
document.head.appendChild(systemJsLoaderTag);
|
406 |
+
}
|
407 |
+
</script>
|
408 |
+
|
409 |
+
<script type="text/javascript">
|
410 |
+
/// LinkedIn (part 1)
|
411 |
+
_linkedin_partner_id = "3734489";
|
412 |
+
window._linkedin_data_partner_ids =
|
413 |
+
window._linkedin_data_partner_ids || [];
|
414 |
+
window._linkedin_data_partner_ids.push(_linkedin_partner_id);
|
415 |
+
</script>
|
416 |
+
|
417 |
+
<script>
|
418 |
+
if (
|
419 |
+
!(
|
420 |
+
["localhost", "huggingface.test"].includes(
|
421 |
+
window.location.hostname
|
422 |
+
) || window.location.hostname.includes("ngrok.io")
|
423 |
+
)
|
424 |
+
) {
|
425 |
+
(function (i, s, o, g, r, a, m) {
|
426 |
+
i["GoogleAnalyticsObject"] = r;
|
427 |
+
(i[r] =
|
428 |
+
i[r] ||
|
429 |
+
function () {
|
430 |
+
(i[r].q = i[r].q || []).push(arguments);
|
431 |
+
}),
|
432 |
+
(i[r].l = 1 * new Date());
|
433 |
+
(a = s.createElement(o)), (m = s.getElementsByTagName(o)[0]);
|
434 |
+
a.async = 1;
|
435 |
+
a.src = g;
|
436 |
+
m.parentNode.insertBefore(a, m);
|
437 |
+
})(
|
438 |
+
window,
|
439 |
+
document,
|
440 |
+
"script",
|
441 |
+
"https://www.google-analytics.com/analytics.js",
|
442 |
+
"ganalytics"
|
443 |
+
);
|
444 |
+
ganalytics("create", "UA-83738774-2", "auto");
|
445 |
+
ganalytics("send", "pageview");
|
446 |
+
|
447 |
+
/// LinkedIn (part 2)
|
448 |
+
(function (l) {
|
449 |
+
if (!l) {
|
450 |
+
window.lintrk = function (a, b) {
|
451 |
+
window.lintrk.q.push([a, b]);
|
452 |
+
};
|
453 |
+
window.lintrk.q = [];
|
454 |
+
}
|
455 |
+
var s = document.getElementsByTagName("script")[0];
|
456 |
+
var b = document.createElement("script");
|
457 |
+
b.type = "text/javascript";
|
458 |
+
b.async = true;
|
459 |
+
b.src = "https://snap.licdn.com/li.lms-analytics/insight.min.js";
|
460 |
+
s.parentNode.insertBefore(b, s);
|
461 |
+
})(window.lintrk);
|
462 |
+
|
463 |
+
/// Twitter
|
464 |
+
!(function (e, t, n, s, u, a) {
|
465 |
+
e.twq ||
|
466 |
+
((s = e.twq =
|
467 |
+
function () {
|
468 |
+
s.exe ? s.exe.apply(s, arguments) : s.queue.push(arguments);
|
469 |
+
}),
|
470 |
+
(s.version = "1.1"),
|
471 |
+
(s.queue = []),
|
472 |
+
(u = t.createElement(n)),
|
473 |
+
(u.async = !0),
|
474 |
+
(u.src = "//static.ads-twitter.com/uwt.js"),
|
475 |
+
(a = t.getElementsByTagName(n)[0]),
|
476 |
+
a.parentNode.insertBefore(u, a));
|
477 |
+
})(window, document, "script");
|
478 |
+
twq("init", "o6bfm");
|
479 |
+
twq("track", "PageView");
|
480 |
+
}
|
481 |
+
</script>
|
482 |
+
|
483 |
+
<noscript>
|
484 |
+
<!-- LinkedIn (part 3) -->
|
485 |
+
<img
|
486 |
+
height="1"
|
487 |
+
width="1"
|
488 |
+
style="display: none"
|
489 |
+
alt=""
|
490 |
+
src="https://px.ads.linkedin.com/collect/?pid=3734489&fmt=gif"
|
491 |
+
/>
|
492 |
+
</noscript>
|
493 |
+
</body>
|
494 |
+
</html>
|