sangmini commited on
Commit
c527d0a
·
1 Parent(s): e800c33

Upload . with huggingface_hub

Browse files
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
36
+ unigram.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
2_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 384, "out_features": 1536, "bias": true, "activation_function": "torch.nn.modules.activation.Tanh"}
2_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579e2cb38413bf2e5d742dc4e7e5ac8738a02bbf23d1fe5693ea55cb18e8584e
3
+ size 2366655
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: sentence-similarity
3
+ tags:
4
+ - sentence-transformers
5
+ - feature-extraction
6
+ - sentence-similarity
7
+
8
+ ---
9
+
10
+ # {MODEL_NAME}
11
+
12
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 1536 dimensional dense vector space and can be used for tasks like clustering or semantic search.
13
+
14
+ <!--- Describe your model here -->
15
+
16
+ ## Usage (Sentence-Transformers)
17
+
18
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
19
+
20
+ ```
21
+ pip install -U sentence-transformers
22
+ ```
23
+
24
+ Then you can use the model like this:
25
+
26
+ ```python
27
+ from sentence_transformers import SentenceTransformer
28
+ sentences = ["This is an example sentence", "Each sentence is converted"]
29
+
30
+ model = SentenceTransformer('{MODEL_NAME}')
31
+ embeddings = model.encode(sentences)
32
+ print(embeddings)
33
+ ```
34
+
35
+
36
+
37
+ ## Evaluation Results
38
+
39
+ <!--- Describe how your model was evaluated -->
40
+
41
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
42
+
43
+
44
+ ## Training
45
+ The model was trained with the parameters:
46
+
47
+ **DataLoader**:
48
+
49
+ `torch.utils.data.dataloader.DataLoader` of length 5629 with parameters:
50
+ ```
51
+ {'batch_size': 256, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
52
+ ```
53
+
54
+ **Loss**:
55
+
56
+ `sentence_transformers.losses.MSELoss.MSELoss`
57
+
58
+ Parameters of the fit()-Method:
59
+ ```
60
+ {
61
+ "epochs": 10,
62
+ "evaluation_steps": 5000,
63
+ "evaluator": "sentence_transformers.evaluation.SequentialEvaluator.SequentialEvaluator",
64
+ "max_grad_norm": 1,
65
+ "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
66
+ "optimizer_params": {
67
+ "lr": 1e-05
68
+ },
69
+ "scheduler": "WarmupLinear",
70
+ "steps_per_epoch": null,
71
+ "warmup_steps": 0,
72
+ "weight_decay": 0.01
73
+ }
74
+ ```
75
+
76
+
77
+ ## Full Model Architecture
78
+ ```
79
+ SentenceTransformer(
80
+ (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
81
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
82
+ (2): Dense({'in_features': 384, 'out_features': 1536, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
83
+ )
84
+ ```
85
+
86
+ ## Citing & Authors
87
+
88
+ <!--- Describe where people can find more information -->
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.26.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 250037
26
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.2",
4
+ "transformers": "4.26.1",
5
+ "pytorch": "1.13.1+cu116"
6
+ }
7
+ }
eval/mse_evaluation_TED2020-en-ja-dev.tsv.gz_results.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,MSE
2
+ 0,5000,15.336495637893677
3
+ 0,5000,12.954607605934143
4
+ 0,-1,12.912595272064209
5
+ 1,5000,12.687340378761292
6
+ 1,-1,12.669636309146881
7
+ 2,5000,12.557819485664368
8
+ 2,-1,12.54439651966095
9
+ 3,5000,12.48689889907837
10
+ 3,-1,12.482773512601852
11
+ 4,5000,12.444017827510834
12
+ 4,-1,12.438619136810303
13
+ 5,5000,12.410631030797958
14
+ 5,-1,12.410634011030197
15
+ 6,5000,12.390589714050293
16
+ 6,-1,12.387491017580032
17
+ 7,5000,12.376674264669418
18
+ 7,-1,12.375471740961075
19
+ 8,5000,12.36639991402626
20
+ 8,-1,12.365838885307312
21
+ 9,5000,12.362902611494064
22
+ 9,-1,12.362852692604065
eval/mse_evaluation_TED2020-en-ko-dev.tsv.gz_results.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,MSE
2
+ 0,5000,15.262071788311005
3
+ 0,5000,12.939640879631042
4
+ 0,-1,12.899290025234222
5
+ 1,5000,12.65731006860733
6
+ 1,-1,12.63882964849472
7
+ 2,5000,12.517587840557098
8
+ 2,-1,12.504678964614868
9
+ 3,5000,12.440747022628784
10
+ 3,-1,12.435487657785416
11
+ 4,5000,12.395995855331421
12
+ 4,-1,12.389052659273148
13
+ 5,5000,12.358856946229935
14
+ 5,-1,12.358789891004562
15
+ 6,5000,12.337474524974823
16
+ 6,-1,12.334243953227997
17
+ 7,5000,12.321463972330093
18
+ 7,-1,12.321139872074127
19
+ 8,5000,12.31139600276947
20
+ 8,-1,12.310890853404999
21
+ 9,5000,12.307607382535934
22
+ 9,-1,12.307436019182205
eval/similarity_evaluation_STS.en-en.txt_results.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2
+ 0,5000,0.7655745634242543,0.7836504132433898,0.7846846467902374,0.7838618313464806,0.7836273750089683,0.7822623572974621,0.5494118147277136,0.541470976153046
3
+ 0,5000,0.5433990711978052,0.6367006089229957,0.6259777625370179,0.642350085034129,0.6278392957870316,0.6448336711506177,0.17866541866368144,0.17027075867749752
4
+ 0,-1,0.5514187157185174,0.6441905757206711,0.6343457732981078,0.651206197174321,0.6356036005712372,0.6526211608788242,0.17619759098086993,0.1669272774762566
5
+ 1,5000,0.6259974781249414,0.7028448766559378,0.699061169438624,0.7103110108674464,0.7003582184299956,0.7107961193149017,0.3107339912723675,0.28766815615132896
6
+ 1,-1,0.6320028753018896,0.7069686828558587,0.7018952028092444,0.7151782399971442,0.7032080521125087,0.7145897288774501,0.325787489252297,0.30655278991248835
7
+ 2,5000,0.6532984908864011,0.7200200989544714,0.7167047871038497,0.7264994872193732,0.7180204407529439,0.7284187791988854,0.3939642284126825,0.37640994393225324
8
+ 2,-1,0.6595646756177629,0.7207808197290466,0.7207058362824752,0.7303038598853518,0.7219193242676952,0.7325018393643925,0.3926348211295626,0.37538091436502835
9
+ 3,5000,0.6691316041398592,0.7268093108395388,0.7271152475614135,0.7360536634963155,0.727627390318982,0.7374436414249986,0.42470505212920584,0.40564253284832236
10
+ 3,-1,0.6740698957264698,0.7324541741920593,0.7309594678451006,0.7400790641791616,0.7312940067366801,0.7411177036601633,0.4340591834670731,0.41405236059271755
11
+ 4,5000,0.6807875710274601,0.7361097853927722,0.7350881349038876,0.7451135058085766,0.7345268271030532,0.7434967339147597,0.4468829533734607,0.42710300789822797
12
+ 4,-1,0.6801609860450414,0.7378207344416022,0.7356695044433241,0.7460164533070492,0.734915697335349,0.7453107012392775,0.4471632459627408,0.4222465418719602
13
+ 5,5000,0.687594182855673,0.746239018910121,0.7412780017392339,0.7525212117443221,0.740047690811974,0.752489691227134,0.4545440684939544,0.42849029505105385
14
+ 5,-1,0.6870067772718528,0.7474283418391438,0.7411894808586456,0.7520572451071759,0.739837237251545,0.751876578728171,0.4601486601510168,0.4344857280581537
15
+ 6,5000,0.6897349652251179,0.7495851908872195,0.7434885283062591,0.7556036876873837,0.7420581753647143,0.7548368165679913,0.460962258383408,0.4365022723650874
16
+ 6,-1,0.6897381062614594,0.7491638922672424,0.7430627618238766,0.7541691197587759,0.7414341614238988,0.7540784021727224,0.4631013001864373,0.4377907696042872
17
+ 7,5000,0.6949463226109924,0.7541103070864615,0.7473258964764949,0.7592827470777122,0.7457521688697132,0.7582387260449954,0.47035650643256577,0.44594920200428007
18
+ 7,-1,0.6945277705869185,0.7537605062249844,0.7467006246144783,0.758808786130238,0.7451742148625806,0.7576640532011399,0.4746718702140449,0.45128116656422634
19
+ 8,5000,0.6970970176332015,0.7548437357059107,0.7485574923693642,0.7597217279390386,0.746960239958548,0.7599858083696264,0.48098707129883783,0.4573730831056457
20
+ 8,-1,0.6976548965751576,0.7553219250154466,0.7488958919812253,0.7609679415576204,0.7472972687653574,0.7599277644904142,0.48139796322076295,0.4568818243133732
21
+ 9,5000,0.6975963820651174,0.7558831439800144,0.7490601641362845,0.7604932118170439,0.7474640856660989,0.7607053987132367,0.48040339603301346,0.45827410862136303
22
+ 9,-1,0.6977032578721013,0.755818565359434,0.7491281424746049,0.7603794304379261,0.7475372403588282,0.7603940375068668,0.4806896585543663,0.4585566400864023
eval/translation_evaluation_TED2020-en-ja-dev.tsv.gz_results.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,src2trg,trg2src
2
+ 0,5000,0.914,0.896
3
+ 0,5000,0.649,0.613
4
+ 0,-1,0.687,0.67
5
+ 1,5000,0.821,0.793
6
+ 1,-1,0.826,0.797
7
+ 2,5000,0.842,0.825
8
+ 2,-1,0.846,0.823
9
+ 3,5000,0.858,0.837
10
+ 3,-1,0.858,0.839
11
+ 4,5000,0.868,0.851
12
+ 4,-1,0.869,0.855
13
+ 5,5000,0.869,0.858
14
+ 5,-1,0.872,0.856
15
+ 6,5000,0.873,0.852
16
+ 6,-1,0.873,0.854
17
+ 7,5000,0.877,0.854
18
+ 7,-1,0.877,0.855
19
+ 8,5000,0.879,0.856
20
+ 8,-1,0.879,0.852
21
+ 9,5000,0.88,0.854
22
+ 9,-1,0.879,0.854
eval/translation_evaluation_TED2020-en-ko-dev.tsv.gz_results.csv ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,src2trg,trg2src
2
+ 0,5000,0.949,0.93
3
+ 0,5000,0.719,0.713
4
+ 0,-1,0.751,0.736
5
+ 1,5000,0.872,0.84
6
+ 1,-1,0.877,0.847
7
+ 2,5000,0.899,0.885
8
+ 2,-1,0.904,0.887
9
+ 3,5000,0.911,0.898
10
+ 3,-1,0.913,0.9
11
+ 4,5000,0.922,0.901
12
+ 4,-1,0.918,0.907
13
+ 5,5000,0.921,0.911
14
+ 5,-1,0.92,0.908
15
+ 6,5000,0.923,0.911
16
+ 6,-1,0.926,0.912
17
+ 7,5000,0.93,0.914
18
+ 7,-1,0.929,0.914
19
+ 8,5000,0.927,0.916
20
+ 8,-1,0.929,0.915
21
+ 9,5000,0.929,0.915
22
+ 9,-1,0.929,0.915
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
+ }
20
+ ]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:accc8fd2a364dded93a81ad811808e91a32f8fd9a3cd889f89488bdda2603405
3
+ size 470686253
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 128,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60b6b43406a48bf3638526314f3d232d97058bc93472ff2de930d43686fa441
3
+ size 17082913
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "do_lower_case": true,
5
+ "eos_token": "</s>",
6
+ "mask_token": {
7
+ "__type": "AddedToken",
8
+ "content": "<mask>",
9
+ "lstrip": true,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "model_max_length": 512,
15
+ "name_or_path": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "special_tokens_map_file": null,
19
+ "strip_accents": null,
20
+ "tokenize_chinese_chars": true,
21
+ "tokenizer_class": "BertTokenizer",
22
+ "unk_token": "<unk>"
23
+ }
unigram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b44701d7efd054205115acfa6ef126c5d2f84bd3affe0c59e48163674d19a6
3
+ size 14763234