tzmtwtr commited on
Commit
96a2c60
β€’
1 Parent(s): 94068b8

to be pure model

Browse files
{model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/1_Pooling β†’ 1_Pooling}/config.json RENAMED
File without changes
License.txt DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright Β© 2020 nnnmu24
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,6 +0,0 @@
1
- # Model:
2
- converted from the below model.
3
- https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2
4
-
5
- # License
6
- This project is under the MIT License except for the Model.
 
 
 
 
 
 
 
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/config.json β†’ config.json RENAMED
File without changes
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/config_sentence_transformers.json β†’ config_sentence_transformers.json RENAMED
File without changes
index.d.ts DELETED
@@ -1,3 +0,0 @@
1
- declare module 'embedding';
2
- declare async function embedding_calc(text: string): Promise<number[]>;
3
- export { embedding_calc }
 
 
 
 
index.js DELETED
@@ -1,17 +0,0 @@
1
- import path from 'path';
2
- import { fileURLToPath } from 'url';
3
- import { env, pipeline } from "@xenova/transformers";
4
-
5
- const __filename = fileURLToPath(import.meta.url);
6
- const __dirname = path.dirname(__filename);
7
- env.localModelPath = __dirname + '/model'
8
-
9
- let pipe = null;
10
- const piping = pipeline("feature-extraction", "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", { local_files_only: true, quantized: true }).then(p => { pipe = p; })
11
-
12
- export async function embedding_calc(text) {
13
- await piping;
14
- if (pipe) {
15
- return [...(await pipe(text)).data.values()];
16
- }
17
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/README.md DELETED
@@ -1,163 +0,0 @@
1
- ---
2
- language:
3
- - multilingual
4
- - ar
5
- - bg
6
- - ca
7
- - cs
8
- - da
9
- - de
10
- - el
11
- - en
12
- - es
13
- - et
14
- - fa
15
- - fi
16
- - fr
17
- - gl
18
- - gu
19
- - he
20
- - hi
21
- - hr
22
- - hu
23
- - hy
24
- - id
25
- - it
26
- - ja
27
- - ka
28
- - ko
29
- - ku
30
- - lt
31
- - lv
32
- - mk
33
- - mn
34
- - mr
35
- - ms
36
- - my
37
- - nb
38
- - nl
39
- - pl
40
- - pt
41
- - ro
42
- - ru
43
- - sk
44
- - sl
45
- - sq
46
- - sr
47
- - sv
48
- - th
49
- - tr
50
- - uk
51
- - ur
52
- - vi
53
- language_bcp47:
54
- - fr-ca
55
- - pt-br
56
- - zh-cn
57
- - zh-tw
58
- pipeline_tag: sentence-similarity
59
- license: apache-2.0
60
- tags:
61
- - sentence-transformers
62
- - feature-extraction
63
- - sentence-similarity
64
- - transformers
65
- ---
66
-
67
- # sentence-transformers/paraphrase-multilingual-mpnet-base-v2
68
-
69
- This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
70
-
71
-
72
-
73
- ## Usage (Sentence-Transformers)
74
-
75
- Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
76
-
77
- ```
78
- pip install -U sentence-transformers
79
- ```
80
-
81
- Then you can use the model like this:
82
-
83
- ```python
84
- from sentence_transformers import SentenceTransformer
85
- sentences = ["This is an example sentence", "Each sentence is converted"]
86
-
87
- model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
88
- embeddings = model.encode(sentences)
89
- print(embeddings)
90
- ```
91
-
92
-
93
-
94
- ## Usage (HuggingFace Transformers)
95
- Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
96
-
97
- ```python
98
- from transformers import AutoTokenizer, AutoModel
99
- import torch
100
-
101
-
102
- #Mean Pooling - Take attention mask into account for correct averaging
103
- def mean_pooling(model_output, attention_mask):
104
- token_embeddings = model_output[0] #First element of model_output contains all token embeddings
105
- input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
106
- return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
107
-
108
-
109
- # Sentences we want sentence embeddings for
110
- sentences = ['This is an example sentence', 'Each sentence is converted']
111
-
112
- # Load model from HuggingFace Hub
113
- tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
114
- model = AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
115
-
116
- # Tokenize sentences
117
- encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
118
-
119
- # Compute token embeddings
120
- with torch.no_grad():
121
- model_output = model(**encoded_input)
122
-
123
- # Perform pooling. In this case, average pooling
124
- sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
125
-
126
- print("Sentence embeddings:")
127
- print(sentence_embeddings)
128
- ```
129
-
130
-
131
-
132
- ## Evaluation Results
133
-
134
-
135
-
136
- For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=sentence-transformers/paraphrase-multilingual-mpnet-base-v2)
137
-
138
-
139
-
140
- ## Full Model Architecture
141
- ```
142
- SentenceTransformer(
143
- (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
144
- (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
145
- )
146
- ```
147
-
148
- ## Citing & Authors
149
-
150
- This model was trained by [sentence-transformers](https://www.sbert.net/).
151
-
152
- If you find this model helpful, feel free to cite our publication [Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks](https://arxiv.org/abs/1908.10084):
153
- ```bibtex
154
- @inproceedings{reimers-2019-sentence-bert,
155
- title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
156
- author = "Reimers, Nils and Gurevych, Iryna",
157
- booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
158
- month = "11",
159
- year = "2019",
160
- publisher = "Association for Computational Linguistics",
161
- url = "http://arxiv.org/abs/1908.10084",
162
- }
163
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/modules.json β†’ modules.json RENAMED
File without changes
{model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/onnx β†’ onnx}/model_quantized.onnx RENAMED
File without changes
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/sentence_bert_config.json β†’ sentence_bert_config.json RENAMED
File without changes
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/special_tokens_map.json β†’ special_tokens_map.json RENAMED
File without changes
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/tokenizer.json β†’ tokenizer.json RENAMED
File without changes
model/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/tokenizer_config.json β†’ tokenizer_config.json RENAMED
File without changes