Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +709 -0
- config.json +26 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: mixedbread-ai/mxbai-embed-large-v1
|
3 |
+
library_name: sentence-transformers
|
4 |
+
pipeline_tag: sentence-similarity
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:4488
|
11 |
+
- loss:MultipleNegativesRankingLoss
|
12 |
+
widget:
|
13 |
+
- source_sentence: 'Assess abnormalities of color vision, such as amblyopia.
|
14 |
+
|
15 |
+
Supervise or instruct ophthalmic staff.
|
16 |
+
|
17 |
+
Assist physicians in performing ophthalmic procedures, including surgery.
|
18 |
+
|
19 |
+
Collect ophthalmic measurements or other diagnostic information, using ultrasound
|
20 |
+
equipment, such as A-scan ultrasound biometry or B-scan ultrasonography equipment.
|
21 |
+
|
22 |
+
Call patients to inquire about their post-operative status or recovery.'
|
23 |
+
sentences:
|
24 |
+
- 'Operate diagnostic imaging equipment.
|
25 |
+
|
26 |
+
Sterilize medical equipment or instruments.
|
27 |
+
|
28 |
+
Measure the physical or physiological attributes of patients.
|
29 |
+
|
30 |
+
Diagnose medical conditions.
|
31 |
+
|
32 |
+
Record patient medical histories.
|
33 |
+
|
34 |
+
Supervise medical support personnel.
|
35 |
+
|
36 |
+
Instruct patients in the use of assistive equipment.
|
37 |
+
|
38 |
+
Administer non-intravenous medications.
|
39 |
+
|
40 |
+
Create advanced digital images of patients using computer imaging systems.
|
41 |
+
|
42 |
+
Operate diagnostic or therapeutic medical instruments or equipment.
|
43 |
+
|
44 |
+
Monitor patients following surgeries or other treatments.
|
45 |
+
|
46 |
+
Collect medical information from patients, family members, or other medical professionals.
|
47 |
+
|
48 |
+
Maintain medical equipment or instruments.
|
49 |
+
|
50 |
+
Clean medical equipment or facilities.
|
51 |
+
|
52 |
+
Communicate detailed medical information to patients or family members.
|
53 |
+
|
54 |
+
Calculate numerical data for medical activities.'
|
55 |
+
- 'Draw detailed or technical illustrations.
|
56 |
+
|
57 |
+
Train others on work processes.
|
58 |
+
|
59 |
+
Confer with clients to determine needs.
|
60 |
+
|
61 |
+
Coordinate artistic activities.
|
62 |
+
|
63 |
+
Negotiate for services.
|
64 |
+
|
65 |
+
Coordinate design activities.
|
66 |
+
|
67 |
+
Collaborate with others to develop or refine designs.'
|
68 |
+
- 'Verify mathematical calculations.
|
69 |
+
|
70 |
+
Supervise engineering or other technical personnel.
|
71 |
+
|
72 |
+
Create images or other visual displays.'
|
73 |
+
- source_sentence: 'Recruit, interview, and select employees.
|
74 |
+
|
75 |
+
Plan layouts of stockrooms, warehouses, or other storage areas, considering turnover,
|
76 |
+
size, weight, or related factors pertaining to items stored.
|
77 |
+
|
78 |
+
Coordinate activities with other supervisory personnel or with other work units
|
79 |
+
or departments.
|
80 |
+
|
81 |
+
Research, compile, and prepare reports, manuals, correspondence, or other information
|
82 |
+
required by management or governmental agencies.
|
83 |
+
|
84 |
+
Discuss job performance problems with employees to identify causes and issues
|
85 |
+
and to work on resolving problems.'
|
86 |
+
sentences:
|
87 |
+
- 'Guide class discussions.
|
88 |
+
|
89 |
+
Perform student enrollment or registration activities.
|
90 |
+
|
91 |
+
Write articles, books or other original materials in area of expertise.
|
92 |
+
|
93 |
+
Evaluate effectiveness of educational programs.
|
94 |
+
|
95 |
+
Stay informed about current developments in field of specialization.
|
96 |
+
|
97 |
+
Plan community programs or activities for the general public.
|
98 |
+
|
99 |
+
Advise students on academic or career matters.
|
100 |
+
|
101 |
+
Develop instructional objectives.
|
102 |
+
|
103 |
+
Teach physical science or mathematics courses at the college level.
|
104 |
+
|
105 |
+
Attend training sessions or professional meetings to develop or maintain professional
|
106 |
+
knowledge.
|
107 |
+
|
108 |
+
Compile specialized bibliographies or lists of materials.
|
109 |
+
|
110 |
+
Research topics in area of expertise.
|
111 |
+
|
112 |
+
Evaluate student work.
|
113 |
+
|
114 |
+
Evaluate scholarly materials.
|
115 |
+
|
116 |
+
Serve on institutional or departmental committees.
|
117 |
+
|
118 |
+
Select educational materials or equipment.
|
119 |
+
|
120 |
+
Supervise student research or internship work.
|
121 |
+
|
122 |
+
Write reports or evaluations.
|
123 |
+
|
124 |
+
Maintain student records.
|
125 |
+
|
126 |
+
Prepare tests.
|
127 |
+
|
128 |
+
Order instructional or library materials or equipment.
|
129 |
+
|
130 |
+
Promote educational institutions or programs.
|
131 |
+
|
132 |
+
Develop instructional materials.'
|
133 |
+
- 'Calculate financial data.
|
134 |
+
|
135 |
+
Report maintenance or equipment problems to appropriate personnel.
|
136 |
+
|
137 |
+
Coordinate operational activities.
|
138 |
+
|
139 |
+
Supervise clerical or administrative personnel.
|
140 |
+
|
141 |
+
Compile data or documentation.
|
142 |
+
|
143 |
+
Examine documents to verify adherence to requirements.
|
144 |
+
|
145 |
+
Record personnel information.
|
146 |
+
|
147 |
+
Analyze financial information.
|
148 |
+
|
149 |
+
Perform administrative or clerical tasks.
|
150 |
+
|
151 |
+
Prepare employee work schedules.
|
152 |
+
|
153 |
+
Provide information to coworkers.
|
154 |
+
|
155 |
+
Plan facility layouts or designs.
|
156 |
+
|
157 |
+
Train personnel.
|
158 |
+
|
159 |
+
Develop organizational policies or programs.'
|
160 |
+
- 'Install parts, assemblies, or attachments in transportation or material handling
|
161 |
+
equipment.
|
162 |
+
|
163 |
+
Measure equipment outputs.
|
164 |
+
|
165 |
+
Disassemble equipment to inspect for deficiencies.
|
166 |
+
|
167 |
+
Grind parts to required dimensions.
|
168 |
+
|
169 |
+
Test mechanical equipment to ensure proper functioning.
|
170 |
+
|
171 |
+
Observe equipment in operation to detect potential problems.
|
172 |
+
|
173 |
+
Operate welding equipment.
|
174 |
+
|
175 |
+
Replace worn, damaged, or defective mechanical parts.
|
176 |
+
|
177 |
+
Remove dents from equipment, materials, tools or structures.
|
178 |
+
|
179 |
+
Assemble mechanical components or machine parts.
|
180 |
+
|
181 |
+
Repair defective engines or engine components.
|
182 |
+
|
183 |
+
Reassemble equipment after repair.'
|
184 |
+
- source_sentence: 'Follow up on results of counseling programs and clients'' adjustments
|
185 |
+
to determine effectiveness of programs.
|
186 |
+
|
187 |
+
Gather information from doctors, schools, social workers, juvenile counselors,
|
188 |
+
law enforcement personnel, and others to make recommendations to courts for resolution
|
189 |
+
of child custody or visitation disputes.
|
190 |
+
|
191 |
+
Confer with clients to develop plans for posttreatment activities.'
|
192 |
+
sentences:
|
193 |
+
- 'Inspect mechanical equipment to locate damage, defects, or wear.
|
194 |
+
|
195 |
+
Inspect electrical or electronic systems for defects.
|
196 |
+
|
197 |
+
Maintain work equipment or machinery.
|
198 |
+
|
199 |
+
Clean equipment, parts, or tools to repair or maintain them in good working order.
|
200 |
+
|
201 |
+
Repair electrical components.
|
202 |
+
|
203 |
+
Order materials, supplies, or equipment.'
|
204 |
+
- 'Clean facilities or sites.
|
205 |
+
|
206 |
+
Clean equipment or supplies.
|
207 |
+
|
208 |
+
Remove debris from work sites.'
|
209 |
+
- 'Help clients get needed services or resources.
|
210 |
+
|
211 |
+
Maintain client records.
|
212 |
+
|
213 |
+
Interview clients to gather information about their backgrounds, needs, or progress.
|
214 |
+
|
215 |
+
Counsel clients or patients regarding personal issues.
|
216 |
+
|
217 |
+
Supervise workers providing client or patient services.
|
218 |
+
|
219 |
+
Write reports or evaluations.
|
220 |
+
|
221 |
+
Refer clients to community or social service programs.
|
222 |
+
|
223 |
+
Collaborate with other professionals to assess client needs or plan treatments.
|
224 |
+
|
225 |
+
Lead classes or community events.
|
226 |
+
|
227 |
+
Collect information about clients.
|
228 |
+
|
229 |
+
Develop treatment plans for patients or clients.
|
230 |
+
|
231 |
+
Advise others on social or educational issues.
|
232 |
+
|
233 |
+
Present social services program information to the public.
|
234 |
+
|
235 |
+
Teach life skills or strategies to clients or their families.
|
236 |
+
|
237 |
+
Evaluate characteristics of individuals to determine needs or eligibility.
|
238 |
+
|
239 |
+
Confer with clients to discuss treatment plans or progress.'
|
240 |
+
- source_sentence: 'Perform medical procedures, such as administering oxygen, inserting
|
241 |
+
and removing airways, taking vital signs, or giving emergency treatment, such
|
242 |
+
as first aid or cardiopulmonary resuscitation (CPR).
|
243 |
+
|
244 |
+
Coordinate work with physicians or other healthcare team members, including providing
|
245 |
+
assistance during invasive procedures.
|
246 |
+
|
247 |
+
Process and code film from procedures and complete appropriate documentation.
|
248 |
+
|
249 |
+
Supervise or train students or other medical sonographers.
|
250 |
+
|
251 |
+
Operate ultrasound equipment to produce and record images of the motion, shape,
|
252 |
+
and composition of blood, organs, tissues, or bodily masses, such as fluid accumulations.'
|
253 |
+
sentences:
|
254 |
+
- 'Record vital statistics or other health information.
|
255 |
+
|
256 |
+
Assist practitioners to perform medical procedures.
|
257 |
+
|
258 |
+
Schedule patient procedures or appointments.
|
259 |
+
|
260 |
+
Explain technical medical information to patients.
|
261 |
+
|
262 |
+
Interview patients to gather medical information.
|
263 |
+
|
264 |
+
Inventory medical supplies or equipment.'
|
265 |
+
- 'Adjust settings or positions of medical equipment.
|
266 |
+
|
267 |
+
Create advanced digital images of patients using computer imaging systems.
|
268 |
+
|
269 |
+
Position patients for treatment or examination.
|
270 |
+
|
271 |
+
Collaborate with healthcare professionals to plan or provide treatment.
|
272 |
+
|
273 |
+
Process x-rays or other medical images.
|
274 |
+
|
275 |
+
Schedule patient procedures or appointments.
|
276 |
+
|
277 |
+
Order medical supplies or equipment.
|
278 |
+
|
279 |
+
Supervise patient care personnel.
|
280 |
+
|
281 |
+
Record patient medical histories.
|
282 |
+
|
283 |
+
Monitor patient conditions during treatments, procedures, or activities.
|
284 |
+
|
285 |
+
Operate diagnostic imaging equipment.
|
286 |
+
|
287 |
+
Analyze test data or images to inform diagnosis or treatment.
|
288 |
+
|
289 |
+
Clean medical equipment or facilities.
|
290 |
+
|
291 |
+
Repair medical facility equipment.
|
292 |
+
|
293 |
+
Examine medical instruments or equipment to ensure proper operation.
|
294 |
+
|
295 |
+
Communicate test or assessment results to medical professionals.
|
296 |
+
|
297 |
+
Maintain medical facility records.
|
298 |
+
|
299 |
+
Prepare official health documents or records.
|
300 |
+
|
301 |
+
Gather medical information from patient histories.'
|
302 |
+
- 'Test air quality at work sites.
|
303 |
+
|
304 |
+
Position construction or extraction equipment.
|
305 |
+
|
306 |
+
Record operational or environmental data.
|
307 |
+
|
308 |
+
Operate heavy-duty construction or installation equipment.
|
309 |
+
|
310 |
+
Assist skilled construction or extraction personnel.
|
311 |
+
|
312 |
+
Monitor construction operations.
|
313 |
+
|
314 |
+
Remove debris or vegetation from work sites.
|
315 |
+
|
316 |
+
Load or unload materials used in construction or extraction.
|
317 |
+
|
318 |
+
Operate road-surfacing equipment.
|
319 |
+
|
320 |
+
Select construction equipment.'
|
321 |
+
- source_sentence: 'Distribute materials, supplies, or subassemblies to work areas.
|
322 |
+
|
323 |
+
Complete, review, or maintain production, time, or component waste reports.
|
324 |
+
|
325 |
+
Adjust, repair, or replace electrical or electronic components to correct defects
|
326 |
+
and to ensure conformance to specifications.
|
327 |
+
|
328 |
+
Assemble electrical or electronic systems or support structures and install components,
|
329 |
+
units, subassemblies, wiring, or assembly casings, using rivets, bolts, soldering
|
330 |
+
or micro-welding equipment.
|
331 |
+
|
332 |
+
Fabricate or form parts, coils, or structures according to specifications, using
|
333 |
+
drills, calipers, cutters, or saws.'
|
334 |
+
sentences:
|
335 |
+
- 'Attend training sessions or professional meetings to develop or maintain professional
|
336 |
+
knowledge.
|
337 |
+
|
338 |
+
Supervise student research or internship work.
|
339 |
+
|
340 |
+
Evaluate effectiveness of educational programs.'
|
341 |
+
- 'Supervise service workers.
|
342 |
+
|
343 |
+
Manage budgets for personal services operations.
|
344 |
+
|
345 |
+
Arrange items for use or display.
|
346 |
+
|
347 |
+
Deliver items.
|
348 |
+
|
349 |
+
Prepare operational reports or records.
|
350 |
+
|
351 |
+
Collaborate with others to determine production details.'
|
352 |
+
- 'Mark products, workpieces, or equipment with identifying information.
|
353 |
+
|
354 |
+
Package products for storage or shipment.
|
355 |
+
|
356 |
+
Repair parts or assemblies.
|
357 |
+
|
358 |
+
Instruct workers to use equipment or perform technical procedures.
|
359 |
+
|
360 |
+
Exchange information with colleagues.
|
361 |
+
|
362 |
+
Drill holes in parts, equipment, or materials.
|
363 |
+
|
364 |
+
Review blueprints or other instructions to determine operational methods or sequences.
|
365 |
+
|
366 |
+
Distribute supplies to workers.
|
367 |
+
|
368 |
+
Assemble electrical or electronic equipment.
|
369 |
+
|
370 |
+
Confer with others to resolve production problems or equipment malfunctions.
|
371 |
+
|
372 |
+
Test electrical equipment or systems to ensure proper functioning.
|
373 |
+
|
374 |
+
Adjust flow of electricity to tools or production equipment.
|
375 |
+
|
376 |
+
Operate welding equipment.
|
377 |
+
|
378 |
+
Clean workpieces or finished products.
|
379 |
+
|
380 |
+
Read work orders or other instructions to determine product specifications or
|
381 |
+
materials requirements.
|
382 |
+
|
383 |
+
Record operational or production data.'
|
384 |
+
---
|
385 |
+
|
386 |
+
# SentenceTransformer based on mixedbread-ai/mxbai-embed-large-v1
|
387 |
+
|
388 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
389 |
+
|
390 |
+
## Model Details
|
391 |
+
|
392 |
+
### Model Description
|
393 |
+
- **Model Type:** Sentence Transformer
|
394 |
+
- **Base model:** [mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1) <!-- at revision 526dc52cb738085d87002bf00ca4d3d99fd0029b -->
|
395 |
+
- **Maximum Sequence Length:** 128 tokens
|
396 |
+
- **Output Dimensionality:** 1024 tokens
|
397 |
+
- **Similarity Function:** Cosine Similarity
|
398 |
+
<!-- - **Training Dataset:** Unknown -->
|
399 |
+
<!-- - **Language:** Unknown -->
|
400 |
+
<!-- - **License:** Unknown -->
|
401 |
+
|
402 |
+
### Model Sources
|
403 |
+
|
404 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
405 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
406 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
407 |
+
|
408 |
+
### Full Model Architecture
|
409 |
+
|
410 |
+
```
|
411 |
+
SentenceTransformer(
|
412 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
413 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
414 |
+
)
|
415 |
+
```
|
416 |
+
|
417 |
+
## Usage
|
418 |
+
|
419 |
+
### Direct Usage (Sentence Transformers)
|
420 |
+
|
421 |
+
First install the Sentence Transformers library:
|
422 |
+
|
423 |
+
```bash
|
424 |
+
pip install -U sentence-transformers
|
425 |
+
```
|
426 |
+
|
427 |
+
Then you can load this model and run inference.
|
428 |
+
```python
|
429 |
+
from sentence_transformers import SentenceTransformer
|
430 |
+
|
431 |
+
# Download from the 🤗 Hub
|
432 |
+
model = SentenceTransformer("Daxtra/onet_sbert")
|
433 |
+
# Run inference
|
434 |
+
sentences = [
|
435 |
+
'Distribute materials, supplies, or subassemblies to work areas.\nComplete, review, or maintain production, time, or component waste reports.\nAdjust, repair, or replace electrical or electronic components to correct defects and to ensure conformance to specifications.\nAssemble electrical or electronic systems or support structures and install components, units, subassemblies, wiring, or assembly casings, using rivets, bolts, soldering or micro-welding equipment.\nFabricate or form parts, coils, or structures according to specifications, using drills, calipers, cutters, or saws.',
|
436 |
+
'Mark products, workpieces, or equipment with identifying information.\nPackage products for storage or shipment.\nRepair parts or assemblies.\nInstruct workers to use equipment or perform technical procedures.\nExchange information with colleagues.\nDrill holes in parts, equipment, or materials.\nReview blueprints or other instructions to determine operational methods or sequences.\nDistribute supplies to workers.\nAssemble electrical or electronic equipment.\nConfer with others to resolve production problems or equipment malfunctions.\nTest electrical equipment or systems to ensure proper functioning.\nAdjust flow of electricity to tools or production equipment.\nOperate welding equipment.\nClean workpieces or finished products.\nRead work orders or other instructions to determine product specifications or materials requirements.\nRecord operational or production data.',
|
437 |
+
'Supervise service workers.\nManage budgets for personal services operations.\nArrange items for use or display.\nDeliver items.\nPrepare operational reports or records.\nCollaborate with others to determine production details.',
|
438 |
+
]
|
439 |
+
embeddings = model.encode(sentences)
|
440 |
+
print(embeddings.shape)
|
441 |
+
# [3, 1024]
|
442 |
+
|
443 |
+
# Get the similarity scores for the embeddings
|
444 |
+
similarities = model.similarity(embeddings, embeddings)
|
445 |
+
print(similarities.shape)
|
446 |
+
# [3, 3]
|
447 |
+
```
|
448 |
+
|
449 |
+
<!--
|
450 |
+
### Direct Usage (Transformers)
|
451 |
+
|
452 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
453 |
+
|
454 |
+
</details>
|
455 |
+
-->
|
456 |
+
|
457 |
+
<!--
|
458 |
+
### Downstream Usage (Sentence Transformers)
|
459 |
+
|
460 |
+
You can finetune this model on your own dataset.
|
461 |
+
|
462 |
+
<details><summary>Click to expand</summary>
|
463 |
+
|
464 |
+
</details>
|
465 |
+
-->
|
466 |
+
|
467 |
+
<!--
|
468 |
+
### Out-of-Scope Use
|
469 |
+
|
470 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
471 |
+
-->
|
472 |
+
|
473 |
+
<!--
|
474 |
+
## Bias, Risks and Limitations
|
475 |
+
|
476 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
477 |
+
-->
|
478 |
+
|
479 |
+
<!--
|
480 |
+
### Recommendations
|
481 |
+
|
482 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
483 |
+
-->
|
484 |
+
|
485 |
+
## Training Details
|
486 |
+
|
487 |
+
### Training Dataset
|
488 |
+
|
489 |
+
#### Unnamed Dataset
|
490 |
+
|
491 |
+
|
492 |
+
* Size: 4,488 training samples
|
493 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
494 |
+
* Approximate statistics based on the first 1000 samples:
|
495 |
+
| | sentence_0 | sentence_1 |
|
496 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
497 |
+
| type | string | string |
|
498 |
+
| details | <ul><li>min: 22 tokens</li><li>mean: 81.13 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 76.36 tokens</li><li>max: 128 tokens</li></ul> |
|
499 |
+
* Samples:
|
500 |
+
| sentence_0 | sentence_1 |
|
501 |
+
|||
|
502 |
+
| <code>Perform business management duties, such as maintaining records or files, preparing reports, or ordering supplies or equipment.<br>Maintain current electrician's license or identification card to meet governmental regulations.<br>Inspect electrical systems, equipment, or components to identify hazards, defects, or the need for adjustment or repair, and to ensure compliance with codes.<br>Perform physically demanding tasks, such as digging trenches to lay conduit or moving or lifting heavy objects.<br>Construct or fabricate parts, using hand tools, according to specifications.</code> | <code>Dig holes or trenches.<br>Communicate with other construction or extraction personnel to discuss project details.<br>Update job related knowledge or skills.<br>Thread wire or cable through ducts or conduits.<br>Repair electrical equipment.<br>Assist skilled construction or extraction personnel.<br>Test electrical equipment or systems to ensure proper functioning.<br>Train construction or extraction personnel.<br>Direct construction or extraction personnel.<br>Fabricate parts or components.<br>Plan layout of construction, installation, or repairs.<br>Create construction or installation diagrams.<br>Install electrical components, equipment, or systems.</code> |
|
503 |
+
| <code>Direct the work of nurses, residents, or other staff to provide patient care.<br>Treat lower urinary tract dysfunctions using equipment such as diathermy machines, catheters, cystoscopes, or radium emanation tubes.<br>Treat urologic disorders using alternatives to traditional surgery such as extracorporeal shock wave lithotripsy, laparoscopy, or laser techniques.<br>Document or review patients' histories.<br>Perform brachytherapy, cryotherapy, high intensity focused ultrasound (HIFU), or photodynamic therapy to treat prostate or other cancers.<br>Provide urology consultation to physicians or other health care professionals.</code> | <code>Train medical providers.<br>Operate diagnostic imaging equipment.<br>Gather medical information from patient histories.<br>Advise medical personnel regarding healthcare issues.<br>Administer non-intravenous medications.<br>Supervise patient care personnel.<br>Record patient medical histories.<br>Prescribe medications.<br>Diagnose medical conditions.</code> |
|
504 |
+
| <code>Examine roadway and clear obstructions from the path of travel.<br>Observe hand signals, grade stakes, or other markings when operating machines.<br>Read written instructions or confer with supervisors about schedules and materials to be moved.<br>Measure, weigh, or verify levels of rock, gravel, or other excavated material to prevent equipment overloads.<br>Monitor loading processes to ensure that materials are loaded according to specifications.</code> | <code>Connect cables or electrical lines.<br>Remove debris or damaged materials.<br>Operate conveyors or other industrial material moving equipment.</code> |
|
505 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
506 |
+
```json
|
507 |
+
{
|
508 |
+
"scale": 20.0,
|
509 |
+
"similarity_fct": "cos_sim"
|
510 |
+
}
|
511 |
+
```
|
512 |
+
|
513 |
+
### Training Hyperparameters
|
514 |
+
#### Non-Default Hyperparameters
|
515 |
+
|
516 |
+
- `eval_strategy`: steps
|
517 |
+
- `per_device_train_batch_size`: 24
|
518 |
+
- `per_device_eval_batch_size`: 24
|
519 |
+
- `num_train_epochs`: 1
|
520 |
+
- `multi_dataset_batch_sampler`: round_robin
|
521 |
+
|
522 |
+
#### All Hyperparameters
|
523 |
+
<details><summary>Click to expand</summary>
|
524 |
+
|
525 |
+
- `overwrite_output_dir`: False
|
526 |
+
- `do_predict`: False
|
527 |
+
- `eval_strategy`: steps
|
528 |
+
- `prediction_loss_only`: True
|
529 |
+
- `per_device_train_batch_size`: 24
|
530 |
+
- `per_device_eval_batch_size`: 24
|
531 |
+
- `per_gpu_train_batch_size`: None
|
532 |
+
- `per_gpu_eval_batch_size`: None
|
533 |
+
- `gradient_accumulation_steps`: 1
|
534 |
+
- `eval_accumulation_steps`: None
|
535 |
+
- `torch_empty_cache_steps`: None
|
536 |
+
- `learning_rate`: 5e-05
|
537 |
+
- `weight_decay`: 0.0
|
538 |
+
- `adam_beta1`: 0.9
|
539 |
+
- `adam_beta2`: 0.999
|
540 |
+
- `adam_epsilon`: 1e-08
|
541 |
+
- `max_grad_norm`: 1
|
542 |
+
- `num_train_epochs`: 1
|
543 |
+
- `max_steps`: -1
|
544 |
+
- `lr_scheduler_type`: linear
|
545 |
+
- `lr_scheduler_kwargs`: {}
|
546 |
+
- `warmup_ratio`: 0.0
|
547 |
+
- `warmup_steps`: 0
|
548 |
+
- `log_level`: passive
|
549 |
+
- `log_level_replica`: warning
|
550 |
+
- `log_on_each_node`: True
|
551 |
+
- `logging_nan_inf_filter`: True
|
552 |
+
- `save_safetensors`: True
|
553 |
+
- `save_on_each_node`: False
|
554 |
+
- `save_only_model`: False
|
555 |
+
- `restore_callback_states_from_checkpoint`: False
|
556 |
+
- `no_cuda`: False
|
557 |
+
- `use_cpu`: False
|
558 |
+
- `use_mps_device`: False
|
559 |
+
- `seed`: 42
|
560 |
+
- `data_seed`: None
|
561 |
+
- `jit_mode_eval`: False
|
562 |
+
- `use_ipex`: False
|
563 |
+
- `bf16`: False
|
564 |
+
- `fp16`: False
|
565 |
+
- `fp16_opt_level`: O1
|
566 |
+
- `half_precision_backend`: auto
|
567 |
+
- `bf16_full_eval`: False
|
568 |
+
- `fp16_full_eval`: False
|
569 |
+
- `tf32`: None
|
570 |
+
- `local_rank`: 0
|
571 |
+
- `ddp_backend`: None
|
572 |
+
- `tpu_num_cores`: None
|
573 |
+
- `tpu_metrics_debug`: False
|
574 |
+
- `debug`: []
|
575 |
+
- `dataloader_drop_last`: False
|
576 |
+
- `dataloader_num_workers`: 0
|
577 |
+
- `dataloader_prefetch_factor`: None
|
578 |
+
- `past_index`: -1
|
579 |
+
- `disable_tqdm`: False
|
580 |
+
- `remove_unused_columns`: True
|
581 |
+
- `label_names`: None
|
582 |
+
- `load_best_model_at_end`: False
|
583 |
+
- `ignore_data_skip`: False
|
584 |
+
- `fsdp`: []
|
585 |
+
- `fsdp_min_num_params`: 0
|
586 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
587 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
588 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
589 |
+
- `deepspeed`: None
|
590 |
+
- `label_smoothing_factor`: 0.0
|
591 |
+
- `optim`: adamw_torch
|
592 |
+
- `optim_args`: None
|
593 |
+
- `adafactor`: False
|
594 |
+
- `group_by_length`: False
|
595 |
+
- `length_column_name`: length
|
596 |
+
- `ddp_find_unused_parameters`: None
|
597 |
+
- `ddp_bucket_cap_mb`: None
|
598 |
+
- `ddp_broadcast_buffers`: False
|
599 |
+
- `dataloader_pin_memory`: True
|
600 |
+
- `dataloader_persistent_workers`: False
|
601 |
+
- `skip_memory_metrics`: True
|
602 |
+
- `use_legacy_prediction_loop`: False
|
603 |
+
- `push_to_hub`: False
|
604 |
+
- `resume_from_checkpoint`: None
|
605 |
+
- `hub_model_id`: None
|
606 |
+
- `hub_strategy`: every_save
|
607 |
+
- `hub_private_repo`: False
|
608 |
+
- `hub_always_push`: False
|
609 |
+
- `gradient_checkpointing`: False
|
610 |
+
- `gradient_checkpointing_kwargs`: None
|
611 |
+
- `include_inputs_for_metrics`: False
|
612 |
+
- `eval_do_concat_batches`: True
|
613 |
+
- `fp16_backend`: auto
|
614 |
+
- `push_to_hub_model_id`: None
|
615 |
+
- `push_to_hub_organization`: None
|
616 |
+
- `mp_parameters`:
|
617 |
+
- `auto_find_batch_size`: False
|
618 |
+
- `full_determinism`: False
|
619 |
+
- `torchdynamo`: None
|
620 |
+
- `ray_scope`: last
|
621 |
+
- `ddp_timeout`: 1800
|
622 |
+
- `torch_compile`: False
|
623 |
+
- `torch_compile_backend`: None
|
624 |
+
- `torch_compile_mode`: None
|
625 |
+
- `dispatch_batches`: None
|
626 |
+
- `split_batches`: None
|
627 |
+
- `include_tokens_per_second`: False
|
628 |
+
- `include_num_input_tokens_seen`: False
|
629 |
+
- `neftune_noise_alpha`: None
|
630 |
+
- `optim_target_modules`: None
|
631 |
+
- `batch_eval_metrics`: False
|
632 |
+
- `eval_on_start`: False
|
633 |
+
- `eval_use_gather_object`: False
|
634 |
+
- `batch_sampler`: batch_sampler
|
635 |
+
- `multi_dataset_batch_sampler`: round_robin
|
636 |
+
|
637 |
+
</details>
|
638 |
+
|
639 |
+
### Training Logs
|
640 |
+
| Epoch | Step |
|
641 |
+
|:------:|:----:|
|
642 |
+
| 0.0963 | 18 |
|
643 |
+
| 0.1925 | 36 |
|
644 |
+
| 0.2888 | 54 |
|
645 |
+
| 0.3850 | 72 |
|
646 |
+
| 0.4813 | 90 |
|
647 |
+
| 0.5775 | 108 |
|
648 |
+
| 0.6738 | 126 |
|
649 |
+
| 0.7701 | 144 |
|
650 |
+
| 0.8663 | 162 |
|
651 |
+
| 0.9626 | 180 |
|
652 |
+
| 1.0 | 187 |
|
653 |
+
|
654 |
+
|
655 |
+
### Framework Versions
|
656 |
+
- Python: 3.10.12
|
657 |
+
- Sentence Transformers: 3.2.0
|
658 |
+
- Transformers: 4.44.2
|
659 |
+
- PyTorch: 2.4.1+cu121
|
660 |
+
- Accelerate: 0.34.2
|
661 |
+
- Datasets: 3.0.1
|
662 |
+
- Tokenizers: 0.19.1
|
663 |
+
|
664 |
+
## Citation
|
665 |
+
|
666 |
+
### BibTeX
|
667 |
+
|
668 |
+
#### Sentence Transformers
|
669 |
+
```bibtex
|
670 |
+
@inproceedings{reimers-2019-sentence-bert,
|
671 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
672 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
673 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
674 |
+
month = "11",
|
675 |
+
year = "2019",
|
676 |
+
publisher = "Association for Computational Linguistics",
|
677 |
+
url = "https://arxiv.org/abs/1908.10084",
|
678 |
+
}
|
679 |
+
```
|
680 |
+
|
681 |
+
#### MultipleNegativesRankingLoss
|
682 |
+
```bibtex
|
683 |
+
@misc{henderson2017efficient,
|
684 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
685 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
686 |
+
year={2017},
|
687 |
+
eprint={1705.00652},
|
688 |
+
archivePrefix={arXiv},
|
689 |
+
primaryClass={cs.CL}
|
690 |
+
}
|
691 |
+
```
|
692 |
+
|
693 |
+
<!--
|
694 |
+
## Glossary
|
695 |
+
|
696 |
+
*Clearly define terms in order to be accessible across audiences.*
|
697 |
+
-->
|
698 |
+
|
699 |
+
<!--
|
700 |
+
## Model Card Authors
|
701 |
+
|
702 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
703 |
+
-->
|
704 |
+
|
705 |
+
<!--
|
706 |
+
## Model Card Contact
|
707 |
+
|
708 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
709 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "mixedbread-ai/mxbai-embed-large-v1",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 4096,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 24,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.44.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": false,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.2.0",
|
4 |
+
"transformers": "4.44.2",
|
5 |
+
"pytorch": "2.4.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3265d7cc00351ba11e39af0803908915144f651c1e344e70e976a063411806eb
|
3 |
+
size 1340612432
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 128,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|