Upload . with huggingface_hub
Browse files- .gitattributes +2 -0
- 1_Pooling/config.json +7 -0
- 2_Dense/config.json +1 -0
- 2_Dense/pytorch_model.bin +3 -0
- README.md +88 -0
- config.json +26 -0
- config_sentence_transformers.json +7 -0
- eval/mse_evaluation_TED2020-en-ja-dev.tsv.gz_results.csv +22 -0
- eval/mse_evaluation_TED2020-en-ko-dev.tsv.gz_results.csv +22 -0
- eval/similarity_evaluation_STS.en-en.txt_results.csv +22 -0
- eval/translation_evaluation_TED2020-en-ja-dev.tsv.gz_results.csv +22 -0
- eval/translation_evaluation_TED2020-en-ko-dev.tsv.gz_results.csv +22 -0
- modules.json +20 -0
- pytorch_model.bin +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +15 -0
- tokenizer.json +3 -0
- tokenizer_config.json +23 -0
- unigram.json +3 -0
.gitattributes
CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
36 |
+
unigram.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
7 |
+
}
|
2_Dense/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"in_features": 384, "out_features": 1536, "bias": true, "activation_function": "torch.nn.modules.activation.Tanh"}
|
2_Dense/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:579e2cb38413bf2e5d742dc4e7e5ac8738a02bbf23d1fe5693ea55cb18e8584e
|
3 |
+
size 2366655
|
README.md
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
pipeline_tag: sentence-similarity
|
3 |
+
tags:
|
4 |
+
- sentence-transformers
|
5 |
+
- feature-extraction
|
6 |
+
- sentence-similarity
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
# {MODEL_NAME}
|
11 |
+
|
12 |
+
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 1536 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
13 |
+
|
14 |
+
<!--- Describe your model here -->
|
15 |
+
|
16 |
+
## Usage (Sentence-Transformers)
|
17 |
+
|
18 |
+
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
|
19 |
+
|
20 |
+
```
|
21 |
+
pip install -U sentence-transformers
|
22 |
+
```
|
23 |
+
|
24 |
+
Then you can use the model like this:
|
25 |
+
|
26 |
+
```python
|
27 |
+
from sentence_transformers import SentenceTransformer
|
28 |
+
sentences = ["This is an example sentence", "Each sentence is converted"]
|
29 |
+
|
30 |
+
model = SentenceTransformer('{MODEL_NAME}')
|
31 |
+
embeddings = model.encode(sentences)
|
32 |
+
print(embeddings)
|
33 |
+
```
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
## Evaluation Results
|
38 |
+
|
39 |
+
<!--- Describe how your model was evaluated -->
|
40 |
+
|
41 |
+
For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
|
42 |
+
|
43 |
+
|
44 |
+
## Training
|
45 |
+
The model was trained with the parameters:
|
46 |
+
|
47 |
+
**DataLoader**:
|
48 |
+
|
49 |
+
`torch.utils.data.dataloader.DataLoader` of length 5629 with parameters:
|
50 |
+
```
|
51 |
+
{'batch_size': 256, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
|
52 |
+
```
|
53 |
+
|
54 |
+
**Loss**:
|
55 |
+
|
56 |
+
`sentence_transformers.losses.MSELoss.MSELoss`
|
57 |
+
|
58 |
+
Parameters of the fit()-Method:
|
59 |
+
```
|
60 |
+
{
|
61 |
+
"epochs": 10,
|
62 |
+
"evaluation_steps": 5000,
|
63 |
+
"evaluator": "sentence_transformers.evaluation.SequentialEvaluator.SequentialEvaluator",
|
64 |
+
"max_grad_norm": 1,
|
65 |
+
"optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
|
66 |
+
"optimizer_params": {
|
67 |
+
"lr": 1e-05
|
68 |
+
},
|
69 |
+
"scheduler": "WarmupLinear",
|
70 |
+
"steps_per_epoch": null,
|
71 |
+
"warmup_steps": 0,
|
72 |
+
"weight_decay": 0.01
|
73 |
+
}
|
74 |
+
```
|
75 |
+
|
76 |
+
|
77 |
+
## Full Model Architecture
|
78 |
+
```
|
79 |
+
SentenceTransformer(
|
80 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
81 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
|
82 |
+
(2): Dense({'in_features': 384, 'out_features': 1536, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
|
83 |
+
)
|
84 |
+
```
|
85 |
+
|
86 |
+
## Citing & Authors
|
87 |
+
|
88 |
+
<!--- Describe where people can find more information -->
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 384,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 1536,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.26.1",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 250037
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "2.2.2",
|
4 |
+
"transformers": "4.26.1",
|
5 |
+
"pytorch": "1.13.1+cu116"
|
6 |
+
}
|
7 |
+
}
|
eval/mse_evaluation_TED2020-en-ja-dev.tsv.gz_results.csv
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,MSE
|
2 |
+
0,5000,15.336495637893677
|
3 |
+
0,5000,12.954607605934143
|
4 |
+
0,-1,12.912595272064209
|
5 |
+
1,5000,12.687340378761292
|
6 |
+
1,-1,12.669636309146881
|
7 |
+
2,5000,12.557819485664368
|
8 |
+
2,-1,12.54439651966095
|
9 |
+
3,5000,12.48689889907837
|
10 |
+
3,-1,12.482773512601852
|
11 |
+
4,5000,12.444017827510834
|
12 |
+
4,-1,12.438619136810303
|
13 |
+
5,5000,12.410631030797958
|
14 |
+
5,-1,12.410634011030197
|
15 |
+
6,5000,12.390589714050293
|
16 |
+
6,-1,12.387491017580032
|
17 |
+
7,5000,12.376674264669418
|
18 |
+
7,-1,12.375471740961075
|
19 |
+
8,5000,12.36639991402626
|
20 |
+
8,-1,12.365838885307312
|
21 |
+
9,5000,12.362902611494064
|
22 |
+
9,-1,12.362852692604065
|
eval/mse_evaluation_TED2020-en-ko-dev.tsv.gz_results.csv
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,MSE
|
2 |
+
0,5000,15.262071788311005
|
3 |
+
0,5000,12.939640879631042
|
4 |
+
0,-1,12.899290025234222
|
5 |
+
1,5000,12.65731006860733
|
6 |
+
1,-1,12.63882964849472
|
7 |
+
2,5000,12.517587840557098
|
8 |
+
2,-1,12.504678964614868
|
9 |
+
3,5000,12.440747022628784
|
10 |
+
3,-1,12.435487657785416
|
11 |
+
4,5000,12.395995855331421
|
12 |
+
4,-1,12.389052659273148
|
13 |
+
5,5000,12.358856946229935
|
14 |
+
5,-1,12.358789891004562
|
15 |
+
6,5000,12.337474524974823
|
16 |
+
6,-1,12.334243953227997
|
17 |
+
7,5000,12.321463972330093
|
18 |
+
7,-1,12.321139872074127
|
19 |
+
8,5000,12.31139600276947
|
20 |
+
8,-1,12.310890853404999
|
21 |
+
9,5000,12.307607382535934
|
22 |
+
9,-1,12.307436019182205
|
eval/similarity_evaluation_STS.en-en.txt_results.csv
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
|
2 |
+
0,5000,0.7655745634242543,0.7836504132433898,0.7846846467902374,0.7838618313464806,0.7836273750089683,0.7822623572974621,0.5494118147277136,0.541470976153046
|
3 |
+
0,5000,0.5433990711978052,0.6367006089229957,0.6259777625370179,0.642350085034129,0.6278392957870316,0.6448336711506177,0.17866541866368144,0.17027075867749752
|
4 |
+
0,-1,0.5514187157185174,0.6441905757206711,0.6343457732981078,0.651206197174321,0.6356036005712372,0.6526211608788242,0.17619759098086993,0.1669272774762566
|
5 |
+
1,5000,0.6259974781249414,0.7028448766559378,0.699061169438624,0.7103110108674464,0.7003582184299956,0.7107961193149017,0.3107339912723675,0.28766815615132896
|
6 |
+
1,-1,0.6320028753018896,0.7069686828558587,0.7018952028092444,0.7151782399971442,0.7032080521125087,0.7145897288774501,0.325787489252297,0.30655278991248835
|
7 |
+
2,5000,0.6532984908864011,0.7200200989544714,0.7167047871038497,0.7264994872193732,0.7180204407529439,0.7284187791988854,0.3939642284126825,0.37640994393225324
|
8 |
+
2,-1,0.6595646756177629,0.7207808197290466,0.7207058362824752,0.7303038598853518,0.7219193242676952,0.7325018393643925,0.3926348211295626,0.37538091436502835
|
9 |
+
3,5000,0.6691316041398592,0.7268093108395388,0.7271152475614135,0.7360536634963155,0.727627390318982,0.7374436414249986,0.42470505212920584,0.40564253284832236
|
10 |
+
3,-1,0.6740698957264698,0.7324541741920593,0.7309594678451006,0.7400790641791616,0.7312940067366801,0.7411177036601633,0.4340591834670731,0.41405236059271755
|
11 |
+
4,5000,0.6807875710274601,0.7361097853927722,0.7350881349038876,0.7451135058085766,0.7345268271030532,0.7434967339147597,0.4468829533734607,0.42710300789822797
|
12 |
+
4,-1,0.6801609860450414,0.7378207344416022,0.7356695044433241,0.7460164533070492,0.734915697335349,0.7453107012392775,0.4471632459627408,0.4222465418719602
|
13 |
+
5,5000,0.687594182855673,0.746239018910121,0.7412780017392339,0.7525212117443221,0.740047690811974,0.752489691227134,0.4545440684939544,0.42849029505105385
|
14 |
+
5,-1,0.6870067772718528,0.7474283418391438,0.7411894808586456,0.7520572451071759,0.739837237251545,0.751876578728171,0.4601486601510168,0.4344857280581537
|
15 |
+
6,5000,0.6897349652251179,0.7495851908872195,0.7434885283062591,0.7556036876873837,0.7420581753647143,0.7548368165679913,0.460962258383408,0.4365022723650874
|
16 |
+
6,-1,0.6897381062614594,0.7491638922672424,0.7430627618238766,0.7541691197587759,0.7414341614238988,0.7540784021727224,0.4631013001864373,0.4377907696042872
|
17 |
+
7,5000,0.6949463226109924,0.7541103070864615,0.7473258964764949,0.7592827470777122,0.7457521688697132,0.7582387260449954,0.47035650643256577,0.44594920200428007
|
18 |
+
7,-1,0.6945277705869185,0.7537605062249844,0.7467006246144783,0.758808786130238,0.7451742148625806,0.7576640532011399,0.4746718702140449,0.45128116656422634
|
19 |
+
8,5000,0.6970970176332015,0.7548437357059107,0.7485574923693642,0.7597217279390386,0.746960239958548,0.7599858083696264,0.48098707129883783,0.4573730831056457
|
20 |
+
8,-1,0.6976548965751576,0.7553219250154466,0.7488958919812253,0.7609679415576204,0.7472972687653574,0.7599277644904142,0.48139796322076295,0.4568818243133732
|
21 |
+
9,5000,0.6975963820651174,0.7558831439800144,0.7490601641362845,0.7604932118170439,0.7474640856660989,0.7607053987132367,0.48040339603301346,0.45827410862136303
|
22 |
+
9,-1,0.6977032578721013,0.755818565359434,0.7491281424746049,0.7603794304379261,0.7475372403588282,0.7603940375068668,0.4806896585543663,0.4585566400864023
|
eval/translation_evaluation_TED2020-en-ja-dev.tsv.gz_results.csv
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,src2trg,trg2src
|
2 |
+
0,5000,0.914,0.896
|
3 |
+
0,5000,0.649,0.613
|
4 |
+
0,-1,0.687,0.67
|
5 |
+
1,5000,0.821,0.793
|
6 |
+
1,-1,0.826,0.797
|
7 |
+
2,5000,0.842,0.825
|
8 |
+
2,-1,0.846,0.823
|
9 |
+
3,5000,0.858,0.837
|
10 |
+
3,-1,0.858,0.839
|
11 |
+
4,5000,0.868,0.851
|
12 |
+
4,-1,0.869,0.855
|
13 |
+
5,5000,0.869,0.858
|
14 |
+
5,-1,0.872,0.856
|
15 |
+
6,5000,0.873,0.852
|
16 |
+
6,-1,0.873,0.854
|
17 |
+
7,5000,0.877,0.854
|
18 |
+
7,-1,0.877,0.855
|
19 |
+
8,5000,0.879,0.856
|
20 |
+
8,-1,0.879,0.852
|
21 |
+
9,5000,0.88,0.854
|
22 |
+
9,-1,0.879,0.854
|
eval/translation_evaluation_TED2020-en-ko-dev.tsv.gz_results.csv
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,src2trg,trg2src
|
2 |
+
0,5000,0.949,0.93
|
3 |
+
0,5000,0.719,0.713
|
4 |
+
0,-1,0.751,0.736
|
5 |
+
1,5000,0.872,0.84
|
6 |
+
1,-1,0.877,0.847
|
7 |
+
2,5000,0.899,0.885
|
8 |
+
2,-1,0.904,0.887
|
9 |
+
3,5000,0.911,0.898
|
10 |
+
3,-1,0.913,0.9
|
11 |
+
4,5000,0.922,0.901
|
12 |
+
4,-1,0.918,0.907
|
13 |
+
5,5000,0.921,0.911
|
14 |
+
5,-1,0.92,0.908
|
15 |
+
6,5000,0.923,0.911
|
16 |
+
6,-1,0.926,0.912
|
17 |
+
7,5000,0.93,0.914
|
18 |
+
7,-1,0.929,0.914
|
19 |
+
8,5000,0.927,0.916
|
20 |
+
8,-1,0.929,0.915
|
21 |
+
9,5000,0.929,0.915
|
22 |
+
9,-1,0.929,0.915
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Dense",
|
18 |
+
"type": "sentence_transformers.models.Dense"
|
19 |
+
}
|
20 |
+
]
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:accc8fd2a364dded93a81ad811808e91a32f8fd9a3cd889f89488bdda2603405
|
3 |
+
size 470686253
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"mask_token": {
|
6 |
+
"content": "<mask>",
|
7 |
+
"lstrip": true,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"pad_token": "<pad>",
|
13 |
+
"sep_token": "</s>",
|
14 |
+
"unk_token": "<unk>"
|
15 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b60b6b43406a48bf3638526314f3d232d97058bc93472ff2de930d43686fa441
|
3 |
+
size 17082913
|
tokenizer_config.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"cls_token": "<s>",
|
4 |
+
"do_lower_case": true,
|
5 |
+
"eos_token": "</s>",
|
6 |
+
"mask_token": {
|
7 |
+
"__type": "AddedToken",
|
8 |
+
"content": "<mask>",
|
9 |
+
"lstrip": true,
|
10 |
+
"normalized": true,
|
11 |
+
"rstrip": false,
|
12 |
+
"single_word": false
|
13 |
+
},
|
14 |
+
"model_max_length": 512,
|
15 |
+
"name_or_path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
16 |
+
"pad_token": "<pad>",
|
17 |
+
"sep_token": "</s>",
|
18 |
+
"special_tokens_map_file": null,
|
19 |
+
"strip_accents": null,
|
20 |
+
"tokenize_chinese_chars": true,
|
21 |
+
"tokenizer_class": "BertTokenizer",
|
22 |
+
"unk_token": "<unk>"
|
23 |
+
}
|
unigram.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71b44701d7efd054205115acfa6ef126c5d2f84bd3affe0c59e48163674d19a6
|
3 |
+
size 14763234
|