Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +815 -0
- config.json +26 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,815 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: mixedbread-ai/mxbai-embed-large-v1
|
3 |
+
library_name: sentence-transformers
|
4 |
+
pipeline_tag: sentence-similarity
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:5856
|
11 |
+
- loss:MultipleNegativesRankingLoss
|
12 |
+
widget:
|
13 |
+
- source_sentence: 'Supervise the training of student opticians.
|
14 |
+
|
15 |
+
Repair damaged frames.
|
16 |
+
|
17 |
+
Order and purchase frames and lenses.
|
18 |
+
|
19 |
+
Verify that finished lenses are ground to specifications.
|
20 |
+
|
21 |
+
Show customers how to insert, remove, and care for their contact lenses.
|
22 |
+
|
23 |
+
Prepare work orders and instructions for grinding lenses and fabricating eyeglasses.'
|
24 |
+
sentences:
|
25 |
+
- 'Fabricate medical devices.
|
26 |
+
|
27 |
+
Verify accuracy of patient information.
|
28 |
+
|
29 |
+
Train medical providers.
|
30 |
+
|
31 |
+
Measure the physical or physiological attributes of patients.
|
32 |
+
|
33 |
+
Fit eyeglasses, contact lenses, or other vision aids.
|
34 |
+
|
35 |
+
Recommend types of assistive devices.
|
36 |
+
|
37 |
+
Merchandise healthcare products or services.
|
38 |
+
|
39 |
+
Perform clerical work in medical settings.
|
40 |
+
|
41 |
+
Operate diagnostic or therapeutic medical instruments or equipment.'
|
42 |
+
- 'Adjust vehicle components according to specifications.
|
43 |
+
|
44 |
+
Install mechanical components in production equipment.
|
45 |
+
|
46 |
+
Inspect installed components or assemblies.
|
47 |
+
|
48 |
+
Sort recyclable materials.
|
49 |
+
|
50 |
+
Mount attachments or tools onto production equipment.
|
51 |
+
|
52 |
+
Review blueprints or other instructions to determine operational methods or sequences.'
|
53 |
+
- 'Estimate construction project costs.
|
54 |
+
|
55 |
+
Apply sealants or other protective coatings.
|
56 |
+
|
57 |
+
Cut metal components for installation.
|
58 |
+
|
59 |
+
Estimate construction project labor requirements.
|
60 |
+
|
61 |
+
Mark reference points on construction materials.
|
62 |
+
|
63 |
+
Smooth surfaces with abrasive materials or tools.
|
64 |
+
|
65 |
+
Remove worn, damaged or outdated materials from work areas.
|
66 |
+
|
67 |
+
Determine construction project layouts.
|
68 |
+
|
69 |
+
Install building fixtures.
|
70 |
+
|
71 |
+
Measure work site dimensions.'
|
72 |
+
- source_sentence: 'Provide sample garments to agents and sales representatives, and
|
73 |
+
arrange for showings of sample garments at sales meetings or fashion shows.
|
74 |
+
|
75 |
+
Research the styles and periods of clothing needed for film or theatrical productions.
|
76 |
+
|
77 |
+
Read scripts and consult directors and other production staff to develop design
|
78 |
+
concepts and plan productions.
|
79 |
+
|
80 |
+
Sketch rough and detailed drawings of apparel or accessories, and write specifications
|
81 |
+
such as color schemes, construction, material types, and accessory requirements.
|
82 |
+
|
83 |
+
Examine sample garments on and off models, modifying designs to achieve desired
|
84 |
+
effects.
|
85 |
+
|
86 |
+
Test fabrics or oversee testing so that garment care labels can be created.'
|
87 |
+
sentences:
|
88 |
+
- 'Measure distances or dimensions.
|
89 |
+
|
90 |
+
Install programs onto computer or computer-controlled equipment.
|
91 |
+
|
92 |
+
Determine types of equipment, tools, or materials needed for jobs.
|
93 |
+
|
94 |
+
Troubleshoot equipment or systems operation problems.
|
95 |
+
|
96 |
+
Enter codes or other information into computers.
|
97 |
+
|
98 |
+
Confer with coworkers to resolve equipment problems.
|
99 |
+
|
100 |
+
Explain use of products or services.
|
101 |
+
|
102 |
+
Maintain work equipment or machinery.
|
103 |
+
|
104 |
+
Test communications equipment to ensure proper functioning.
|
105 |
+
|
106 |
+
Lubricate equipment to allow proper functioning.
|
107 |
+
|
108 |
+
Repair electronic equipment.
|
109 |
+
|
110 |
+
Run wiring to connect equipment.
|
111 |
+
|
112 |
+
Service vehicles to maintain functionality.
|
113 |
+
|
114 |
+
Advise others on issues related to repairs, installation, or equipment design.
|
115 |
+
|
116 |
+
Document operational activities.
|
117 |
+
|
118 |
+
Repair electrical components.
|
119 |
+
|
120 |
+
Gather information about work conditions or locations.
|
121 |
+
|
122 |
+
Adjust equipment to ensure optimal performance.
|
123 |
+
|
124 |
+
Verify information or specifications.
|
125 |
+
|
126 |
+
Climb equipment or structures to access work areas.
|
127 |
+
|
128 |
+
Install electrical components, equipment, or systems.
|
129 |
+
|
130 |
+
Read technical information needed to perform maintenance or repairs.
|
131 |
+
|
132 |
+
Drive trucks or other vehicles to or at work sites.
|
133 |
+
|
134 |
+
Analyze test or performance data to assess equipment operation.
|
135 |
+
|
136 |
+
Assemble electrical components, subsystems, or systems.
|
137 |
+
|
138 |
+
Paint surfaces or equipment.
|
139 |
+
|
140 |
+
Test electrical circuits or components for proper functioning.
|
141 |
+
|
142 |
+
Rewire electrical or electronic systems.
|
143 |
+
|
144 |
+
Clean work areas.
|
145 |
+
|
146 |
+
Investigate legal issues.
|
147 |
+
|
148 |
+
Connect electrical components or equipment.
|
149 |
+
|
150 |
+
Interpret blueprints, specifications, or diagrams to inform installation, development
|
151 |
+
or operation activities.
|
152 |
+
|
153 |
+
Dig holes or trenches.
|
154 |
+
|
155 |
+
Clean equipment, parts, or tools to repair or maintain them in good working order.'
|
156 |
+
- 'Verify information or specifications.
|
157 |
+
|
158 |
+
Control pumps or pumping equipment.
|
159 |
+
|
160 |
+
Load shipments, belongings, or materials.
|
161 |
+
|
162 |
+
Clean vessels or marine equipment.
|
163 |
+
|
164 |
+
Maintain material moving equipment in good working condition.
|
165 |
+
|
166 |
+
Record operational details of travel.
|
167 |
+
|
168 |
+
Signal others to coordinate vehicle movement.
|
169 |
+
|
170 |
+
Measure the level or depth of water or other liquids.
|
171 |
+
|
172 |
+
Operate ships or other watercraft.
|
173 |
+
|
174 |
+
Paint surfaces or equipment.
|
175 |
+
|
176 |
+
Inspect equipment to ensure proper functioning.
|
177 |
+
|
178 |
+
Inspect material-moving equipment to detect problems.
|
179 |
+
|
180 |
+
Monitor equipment gauges or displays to ensure proper operation.
|
181 |
+
|
182 |
+
Secure watercraft to docks, wharves or other vessels.'
|
183 |
+
- 'Draw detailed or technical illustrations.
|
184 |
+
|
185 |
+
Conduct research to inform art, designs, or other work.
|
186 |
+
|
187 |
+
Maintain inventories of materials, equipment, or products.
|
188 |
+
|
189 |
+
Conduct market research.
|
190 |
+
|
191 |
+
Develop artistic or design concepts for decoration, exhibition, or commercial
|
192 |
+
purposes.
|
193 |
+
|
194 |
+
Promote products, activities, or organizations.
|
195 |
+
|
196 |
+
Build models, patterns, or templates.
|
197 |
+
|
198 |
+
Coordinate design activities.
|
199 |
+
|
200 |
+
Study scripts to determine project requirements.
|
201 |
+
|
202 |
+
Monitor current trends.
|
203 |
+
|
204 |
+
Write informational material.'
|
205 |
+
- source_sentence: 'Raise and shape clay into wares, such as vases and pitchers, on
|
206 |
+
revolving wheels, using hands, fingers, and thumbs.
|
207 |
+
|
208 |
+
Maintain supplies of tools, equipment, and materials, and order additional supplies
|
209 |
+
as needed.
|
210 |
+
|
211 |
+
Operate pug mills to blend and extrude clay.
|
212 |
+
|
213 |
+
Attach handles to pottery pieces.
|
214 |
+
|
215 |
+
Position balls of clay in centers of potters'' wheels, and start motors or pump
|
216 |
+
treadles with feet to revolve wheels.
|
217 |
+
|
218 |
+
Start machine units and conveyors and observe lights and gauges on panel board
|
219 |
+
to verify operational efficiency.
|
220 |
+
|
221 |
+
Adjust wheel speeds according to the feel of the clay as pieces enlarge and walls
|
222 |
+
become thinner.'
|
223 |
+
sentences:
|
224 |
+
- 'Apply protective or decorative finishes to workpieces or products.
|
225 |
+
|
226 |
+
Maneuver workpieces in equipment during production.
|
227 |
+
|
228 |
+
Operate heating or drying equipment.
|
229 |
+
|
230 |
+
Set equipment controls to meet cutting specifications.
|
231 |
+
|
232 |
+
Send information, materials or documentation.
|
233 |
+
|
234 |
+
Monitor equipment operation to ensure proper functioning.
|
235 |
+
|
236 |
+
Construct distinctive physical objects for artistic, functional, or commercial
|
237 |
+
purposes.
|
238 |
+
|
239 |
+
Develop artistic or design concepts for decoration, exhibition, or commercial
|
240 |
+
purposes.
|
241 |
+
|
242 |
+
Position raw materials on processing or production equipment.
|
243 |
+
|
244 |
+
Smooth surfaces of objects or equipment.
|
245 |
+
|
246 |
+
Operate mixing equipment.
|
247 |
+
|
248 |
+
Teach classes in area of specialization.
|
249 |
+
|
250 |
+
Measure dimensions of completed products or workpieces to verify conformance to
|
251 |
+
specifications.
|
252 |
+
|
253 |
+
Develop professional relationships or networks.
|
254 |
+
|
255 |
+
Remove products or workpieces from production equipment.
|
256 |
+
|
257 |
+
Mix ingredients to create specific finishes.
|
258 |
+
|
259 |
+
Design jewelry or decorative objects.
|
260 |
+
|
261 |
+
Adjust equipment to ensure optimal performance.
|
262 |
+
|
263 |
+
Package objects for shipping.'
|
264 |
+
- 'Firefighters - Request emergency personnel.
|
265 |
+
|
266 |
+
Attend training to learn new skills or update knowledge.
|
267 |
+
|
268 |
+
Inspect facilities to ensure compliance with fire regulations.
|
269 |
+
|
270 |
+
Train personnel on proper operational procedures.
|
271 |
+
|
272 |
+
Treat medical emergencies.
|
273 |
+
|
274 |
+
Operate firefighting equipment.
|
275 |
+
|
276 |
+
Relay information about incidents or emergencies to personnel using phones or
|
277 |
+
two-way radios.
|
278 |
+
|
279 |
+
Provide first aid or rescue assistance in emergencies.
|
280 |
+
|
281 |
+
Select tools, equipment, or technologies for use in operations or projects.
|
282 |
+
|
283 |
+
Rescue people from hazardous situations.
|
284 |
+
|
285 |
+
Maintain professional knowledge or certifications.
|
286 |
+
|
287 |
+
Examine debris to obtain information about causes of fires.
|
288 |
+
|
289 |
+
Communicate with other workers to coordinate activities.
|
290 |
+
|
291 |
+
Patrol natural areas to ensure safety or enforce regulations.
|
292 |
+
|
293 |
+
Prepare investigation or incident reports.
|
294 |
+
|
295 |
+
Assess characteristics of fires.
|
296 |
+
|
297 |
+
Prepare hoses or water supplies to fight fires.
|
298 |
+
|
299 |
+
Demonstrate activity techniques or equipment use.
|
300 |
+
|
301 |
+
Locate fires or fire danger areas.
|
302 |
+
|
303 |
+
Implement advanced life support techniques.
|
304 |
+
|
305 |
+
Educate the public about fire safety or prevention.
|
306 |
+
|
307 |
+
Participate in physical training to maintain fitness.
|
308 |
+
|
309 |
+
Collaborate with law enforcement or security agencies to respond to incidents.'
|
310 |
+
- 'Exchange information with colleagues.
|
311 |
+
|
312 |
+
Operate textile cutting or production equipment.
|
313 |
+
|
314 |
+
Load materials into production equipment.
|
315 |
+
|
316 |
+
Inspect textile products.
|
317 |
+
|
318 |
+
Record operational or production data.
|
319 |
+
|
320 |
+
Inspect production equipment.
|
321 |
+
|
322 |
+
Repair production equipment or tools.
|
323 |
+
|
324 |
+
Study blueprints or other instructions to determine equipment setup requirements.
|
325 |
+
|
326 |
+
Conduct test runs of production equipment.'
|
327 |
+
- source_sentence: 'Prepare assignments for teacher assistants or volunteers.
|
328 |
+
|
329 |
+
Instruct and monitor students in the use and care of equipment or materials to
|
330 |
+
prevent injuries and damage.
|
331 |
+
|
332 |
+
Develop or implement strategies to meet the needs of students with a variety of
|
333 |
+
disabilities.'
|
334 |
+
sentences:
|
335 |
+
- 'Resolve computer software problems.
|
336 |
+
|
337 |
+
Collaborate with others to develop or implement marketing strategies.
|
338 |
+
|
339 |
+
Develop diagrams or flow charts of system operation.
|
340 |
+
|
341 |
+
Update knowledge about emerging industry or technology trends.'
|
342 |
+
- 'Monitor student performance.
|
343 |
+
|
344 |
+
Prepare tests.
|
345 |
+
|
346 |
+
Discuss problems or issues with supervisors.
|
347 |
+
|
348 |
+
Teach others to use technology or equipment.
|
349 |
+
|
350 |
+
Distribute instructional or library materials.
|
351 |
+
|
352 |
+
Monitor student behavior, social development, or health.
|
353 |
+
|
354 |
+
Assist students with special educational needs.
|
355 |
+
|
356 |
+
Develop strategies or programs for students with special needs.
|
357 |
+
|
358 |
+
Plan educational activities.
|
359 |
+
|
360 |
+
Evaluate student work.
|
361 |
+
|
362 |
+
Maintain inventories of materials, equipment, or products.
|
363 |
+
|
364 |
+
Attend training sessions or professional meetings to develop or maintain professional
|
365 |
+
knowledge.
|
366 |
+
|
367 |
+
Tutor students who need extra assistance.
|
368 |
+
|
369 |
+
Direct activities of subordinates.
|
370 |
+
|
371 |
+
Teach life skills.
|
372 |
+
|
373 |
+
Establish rules or policies governing student behavior.
|
374 |
+
|
375 |
+
Collaborate with other teaching professionals to develop educational programs.
|
376 |
+
|
377 |
+
Display student work.
|
378 |
+
|
379 |
+
Administer tests to assess educational needs or progress.
|
380 |
+
|
381 |
+
Set up classroom materials or equipment.
|
382 |
+
|
383 |
+
Develop instructional objectives.
|
384 |
+
|
385 |
+
Plan experiential learning activities.
|
386 |
+
|
387 |
+
Maintain student records.
|
388 |
+
|
389 |
+
Develop instructional materials.
|
390 |
+
|
391 |
+
Modify teaching methods or materials to accommodate student needs.'
|
392 |
+
- 'Prepare research reports.
|
393 |
+
|
394 |
+
Negotiate agreements to resolve disputes.
|
395 |
+
|
396 |
+
Coordinate regulatory documentation activities.
|
397 |
+
|
398 |
+
Interview witnesses, suspects, or claimants.
|
399 |
+
|
400 |
+
Advise others on human resources topics.
|
401 |
+
|
402 |
+
Confer with personnel to coordinate business operations.'
|
403 |
+
- source_sentence: 'Determine shipping methods, routes, or rates for materials to
|
404 |
+
be shipped.
|
405 |
+
|
406 |
+
Prepare documents, such as work orders, bills of lading, or shipping orders, to
|
407 |
+
route materials.
|
408 |
+
|
409 |
+
Pack, seal, label, or affix postage to prepare materials for shipping, using hand
|
410 |
+
tools, power tools, or postage meter.
|
411 |
+
|
412 |
+
Requisition and store shipping materials and supplies to maintain inventory of
|
413 |
+
stock.
|
414 |
+
|
415 |
+
Confer or correspond with establishment representatives to rectify problems, such
|
416 |
+
as damages, shortages, or nonconformance to specifications.'
|
417 |
+
sentences:
|
418 |
+
- 'Shipping, Receiving, and Inventory Clerks - Analyze shipping information to make
|
419 |
+
routing decisions.
|
420 |
+
|
421 |
+
Record shipping information.
|
422 |
+
|
423 |
+
Coordinate shipping activities with external parties.'
|
424 |
+
- 'Police and Sheriff''s Patrol Officers - Respond to emergencies to provide assistance.
|
425 |
+
|
426 |
+
Maintain surveillance of individuals or establishments.
|
427 |
+
|
428 |
+
Investigate legal issues.
|
429 |
+
|
430 |
+
Investigate illegal or suspicious activities.
|
431 |
+
|
432 |
+
Direct law enforcement activities.
|
433 |
+
|
434 |
+
Monitor access or flow of people to prevent problems.
|
435 |
+
|
436 |
+
Interview people to obtain information about actions or status of individuals.
|
437 |
+
|
438 |
+
Investigate accidents to determine causes.
|
439 |
+
|
440 |
+
Patrol properties to maintain safety.
|
441 |
+
|
442 |
+
Communicate situation details to appropriate personnel.
|
443 |
+
|
444 |
+
Recommend improvements to increase safety or reduce risks.
|
445 |
+
|
446 |
+
Detain suspects or witnesses.
|
447 |
+
|
448 |
+
Inform the public about policies, services or procedures.
|
449 |
+
|
450 |
+
Relay information about incidents or emergencies to personnel using phones or
|
451 |
+
two-way radios.
|
452 |
+
|
453 |
+
Testify at legal or legislative proceedings.
|
454 |
+
|
455 |
+
Serve court ordered documents.
|
456 |
+
|
457 |
+
Communicate health and wellness information to the public.
|
458 |
+
|
459 |
+
Maintain operational records.
|
460 |
+
|
461 |
+
Interview people to gather information about criminal activities.
|
462 |
+
|
463 |
+
Apprehend criminal suspects.
|
464 |
+
|
465 |
+
Locate suspicious objects or vehicles.'
|
466 |
+
- 'Office Machine Operators, Except Computer - Monitor equipment operation to ensure
|
467 |
+
proper functioning.
|
468 |
+
|
469 |
+
Read work orders to determine material or setup requirements.
|
470 |
+
|
471 |
+
Sort materials or products.
|
472 |
+
|
473 |
+
Report maintenance or equipment problems to appropriate personnel.
|
474 |
+
|
475 |
+
Order materials, supplies, or equipment.
|
476 |
+
|
477 |
+
Compile data or documentation.
|
478 |
+
|
479 |
+
Operate office equipment.
|
480 |
+
|
481 |
+
Provide information to coworkers.
|
482 |
+
|
483 |
+
Deliver items.
|
484 |
+
|
485 |
+
Clean facilities or equipment.
|
486 |
+
|
487 |
+
Maintain office equipment in proper operating condition.
|
488 |
+
|
489 |
+
Attach identification information to products, items or containers.'
|
490 |
+
---
|
491 |
+
|
492 |
+
# SentenceTransformer based on mixedbread-ai/mxbai-embed-large-v1
|
493 |
+
|
494 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
495 |
+
|
496 |
+
## Model Details
|
497 |
+
|
498 |
+
### Model Description
|
499 |
+
- **Model Type:** Sentence Transformer
|
500 |
+
- **Base model:** [mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1) <!-- at revision 526dc52cb738085d87002bf00ca4d3d99fd0029b -->
|
501 |
+
- **Maximum Sequence Length:** 128 tokens
|
502 |
+
- **Output Dimensionality:** 1024 tokens
|
503 |
+
- **Similarity Function:** Cosine Similarity
|
504 |
+
<!-- - **Training Dataset:** Unknown -->
|
505 |
+
<!-- - **Language:** Unknown -->
|
506 |
+
<!-- - **License:** Unknown -->
|
507 |
+
|
508 |
+
### Model Sources
|
509 |
+
|
510 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
511 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
512 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
513 |
+
|
514 |
+
### Full Model Architecture
|
515 |
+
|
516 |
+
```
|
517 |
+
SentenceTransformer(
|
518 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
519 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
520 |
+
)
|
521 |
+
```
|
522 |
+
|
523 |
+
## Usage
|
524 |
+
|
525 |
+
### Direct Usage (Sentence Transformers)
|
526 |
+
|
527 |
+
First install the Sentence Transformers library:
|
528 |
+
|
529 |
+
```bash
|
530 |
+
pip install -U sentence-transformers
|
531 |
+
```
|
532 |
+
|
533 |
+
Then you can load this model and run inference.
|
534 |
+
```python
|
535 |
+
from sentence_transformers import SentenceTransformer
|
536 |
+
|
537 |
+
# Download from the 🤗 Hub
|
538 |
+
model = SentenceTransformer("Daxtra/onet_sbert-v2")
|
539 |
+
# Run inference
|
540 |
+
sentences = [
|
541 |
+
'Determine shipping methods, routes, or rates for materials to be shipped.\nPrepare documents, such as work orders, bills of lading, or shipping orders, to route materials.\nPack, seal, label, or affix postage to prepare materials for shipping, using hand tools, power tools, or postage meter.\nRequisition and store shipping materials and supplies to maintain inventory of stock.\nConfer or correspond with establishment representatives to rectify problems, such as damages, shortages, or nonconformance to specifications.',
|
542 |
+
'Shipping, Receiving, and Inventory Clerks - Analyze shipping information to make routing decisions.\nRecord shipping information.\nCoordinate shipping activities with external parties.',
|
543 |
+
'Office Machine Operators, Except Computer - Monitor equipment operation to ensure proper functioning.\nRead work orders to determine material or setup requirements.\nSort materials or products.\nReport maintenance or equipment problems to appropriate personnel.\nOrder materials, supplies, or equipment.\nCompile data or documentation.\nOperate office equipment.\nProvide information to coworkers.\nDeliver items.\nClean facilities or equipment.\nMaintain office equipment in proper operating condition.\nAttach identification information to products, items or containers.',
|
544 |
+
]
|
545 |
+
embeddings = model.encode(sentences)
|
546 |
+
print(embeddings.shape)
|
547 |
+
# [3, 1024]
|
548 |
+
|
549 |
+
# Get the similarity scores for the embeddings
|
550 |
+
similarities = model.similarity(embeddings, embeddings)
|
551 |
+
print(similarities.shape)
|
552 |
+
# [3, 3]
|
553 |
+
```
|
554 |
+
|
555 |
+
<!--
|
556 |
+
### Direct Usage (Transformers)
|
557 |
+
|
558 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
559 |
+
|
560 |
+
</details>
|
561 |
+
-->
|
562 |
+
|
563 |
+
<!--
|
564 |
+
### Downstream Usage (Sentence Transformers)
|
565 |
+
|
566 |
+
You can finetune this model on your own dataset.
|
567 |
+
|
568 |
+
<details><summary>Click to expand</summary>
|
569 |
+
|
570 |
+
</details>
|
571 |
+
-->
|
572 |
+
|
573 |
+
<!--
|
574 |
+
### Out-of-Scope Use
|
575 |
+
|
576 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
577 |
+
-->
|
578 |
+
|
579 |
+
<!--
|
580 |
+
## Bias, Risks and Limitations
|
581 |
+
|
582 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
583 |
+
-->
|
584 |
+
|
585 |
+
<!--
|
586 |
+
### Recommendations
|
587 |
+
|
588 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
589 |
+
-->
|
590 |
+
|
591 |
+
## Training Details
|
592 |
+
|
593 |
+
### Training Dataset
|
594 |
+
|
595 |
+
#### Unnamed Dataset
|
596 |
+
|
597 |
+
|
598 |
+
* Size: 5,856 training samples
|
599 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
600 |
+
* Approximate statistics based on the first 1000 samples:
|
601 |
+
| | sentence_0 | sentence_1 |
|
602 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
603 |
+
| type | string | string |
|
604 |
+
| details | <ul><li>min: 25 tokens</li><li>mean: 88.28 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 82.52 tokens</li><li>max: 128 tokens</li></ul> |
|
605 |
+
* Samples:
|
606 |
+
| sentence_0 | sentence_1 |
|
607 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
608 |
+
| <code>Compile information about new accounts, enter account information into computers, and file related forms or other documents.<br>Obtain credit records from reporting agencies.<br>Perform teller duties as required.<br>Duplicate records for distribution to branch offices.<br>Execute wire transfers of funds.<br>Collect and record customer deposits and fees and issue receipts, using computers.<br>Inform customers of procedures for applying for services, such as ATM cards, direct deposit of checks, and certificates of deposit.</code> | <code>New Accounts Clerks - Refer customers to appropriate personnel.<br>Compile data or documentation.<br>Operate office equipment.<br>Obtain personal or financial information about customers or applicants.<br>Discuss goods or services information with customers or patrons.<br>Distribute materials to employees or customers.<br>Schedule appointments.<br>Type documents.<br>Sell products or services.</code> |
|
609 |
+
| <code>Midwives - Establish and follow emergency or contingency plans for mothers and newborns.<br>Inform patients of how to prepare and supply birth sites.<br>Assess birthing environments to ensure cleanliness, safety, and the availability of appropriate supplies.<br>Perform annual gynecologic exams, including pap smears and breast exams.<br>Provide, or refer patients to other providers for, education or counseling on topics such as genetic testing, newborn care, contraception, or breastfeeding.</code> | <code>Midwives - Assess physical conditions of patients to aid in diagnosis or treatment.<br>Measure the physical or physiological attributes of patients.<br>Communicate detailed medical information to patients or family members.<br>Collect biological specimens from patients.<br>Care for women during pregnancy and childbirth.<br>Evaluate patient functioning, capabilities, or health.<br>Record patient medical histories.<br>Refer patients to other healthcare practitioners or health resources.<br>Collect medical information from patients, family members, or other medical professionals.<br>Analyze quantitative data to determine effectiveness of treatments or therapies.<br>Treat patients using alternative medical procedures.<br>Diagnose medical conditions.<br>Develop medical treatment plans.<br>Position patients for treatment or examination.<br>Provide health and wellness advice to patients, program participants, or caregivers.<br>Examine patients to assess general physical condition.<br>Analyze patient data to determine patient needs or treatment goals.<br>Conduct research to increase knowledge about medical issues.<br>Monitor patient conditions during treatments, procedures, or activities.<br>Prepare medical supplies or equipment for use.<br>Prepare official health documents or records.<br>Analyze test data or images to inform diagnosis or treatment.<br>Communicate health and wellness information to the public.<br>Treat medical emergencies.<br>Test biological specimens to gather information about patient conditions.</code> |
|
610 |
+
| <code>Plan, prepare, and develop various teaching aids, such as bibliographies, charts, and graphs.<br>Clean classrooms.<br>Operate and maintain audio-visual equipment.</code> | <code>Distribute instructional or library materials.<br>Clean facilities or work areas.<br>Supervise school or student activities.<br>Collaborate with other teaching professionals to develop educational programs.<br>Serve on institutional or departmental committees.<br>Create technology-based learning materials.<br>Maintain clean work areas.<br>Evaluate student work.<br>Plan educational activities.<br>Set up classroom materials or equipment.<br>Lead classes or community events.<br>Develop instructional materials.</code> |
|
611 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
612 |
+
```json
|
613 |
+
{
|
614 |
+
"scale": 20.0,
|
615 |
+
"similarity_fct": "cos_sim"
|
616 |
+
}
|
617 |
+
```
|
618 |
+
|
619 |
+
### Training Hyperparameters
|
620 |
+
#### Non-Default Hyperparameters
|
621 |
+
|
622 |
+
- `eval_strategy`: steps
|
623 |
+
- `per_device_train_batch_size`: 24
|
624 |
+
- `per_device_eval_batch_size`: 24
|
625 |
+
- `num_train_epochs`: 1
|
626 |
+
- `multi_dataset_batch_sampler`: round_robin
|
627 |
+
|
628 |
+
#### All Hyperparameters
|
629 |
+
<details><summary>Click to expand</summary>
|
630 |
+
|
631 |
+
- `overwrite_output_dir`: False
|
632 |
+
- `do_predict`: False
|
633 |
+
- `eval_strategy`: steps
|
634 |
+
- `prediction_loss_only`: True
|
635 |
+
- `per_device_train_batch_size`: 24
|
636 |
+
- `per_device_eval_batch_size`: 24
|
637 |
+
- `per_gpu_train_batch_size`: None
|
638 |
+
- `per_gpu_eval_batch_size`: None
|
639 |
+
- `gradient_accumulation_steps`: 1
|
640 |
+
- `eval_accumulation_steps`: None
|
641 |
+
- `torch_empty_cache_steps`: None
|
642 |
+
- `learning_rate`: 5e-05
|
643 |
+
- `weight_decay`: 0.0
|
644 |
+
- `adam_beta1`: 0.9
|
645 |
+
- `adam_beta2`: 0.999
|
646 |
+
- `adam_epsilon`: 1e-08
|
647 |
+
- `max_grad_norm`: 1
|
648 |
+
- `num_train_epochs`: 1
|
649 |
+
- `max_steps`: -1
|
650 |
+
- `lr_scheduler_type`: linear
|
651 |
+
- `lr_scheduler_kwargs`: {}
|
652 |
+
- `warmup_ratio`: 0.0
|
653 |
+
- `warmup_steps`: 0
|
654 |
+
- `log_level`: passive
|
655 |
+
- `log_level_replica`: warning
|
656 |
+
- `log_on_each_node`: True
|
657 |
+
- `logging_nan_inf_filter`: True
|
658 |
+
- `save_safetensors`: True
|
659 |
+
- `save_on_each_node`: False
|
660 |
+
- `save_only_model`: False
|
661 |
+
- `restore_callback_states_from_checkpoint`: False
|
662 |
+
- `no_cuda`: False
|
663 |
+
- `use_cpu`: False
|
664 |
+
- `use_mps_device`: False
|
665 |
+
- `seed`: 42
|
666 |
+
- `data_seed`: None
|
667 |
+
- `jit_mode_eval`: False
|
668 |
+
- `use_ipex`: False
|
669 |
+
- `bf16`: False
|
670 |
+
- `fp16`: False
|
671 |
+
- `fp16_opt_level`: O1
|
672 |
+
- `half_precision_backend`: auto
|
673 |
+
- `bf16_full_eval`: False
|
674 |
+
- `fp16_full_eval`: False
|
675 |
+
- `tf32`: None
|
676 |
+
- `local_rank`: 0
|
677 |
+
- `ddp_backend`: None
|
678 |
+
- `tpu_num_cores`: None
|
679 |
+
- `tpu_metrics_debug`: False
|
680 |
+
- `debug`: []
|
681 |
+
- `dataloader_drop_last`: False
|
682 |
+
- `dataloader_num_workers`: 0
|
683 |
+
- `dataloader_prefetch_factor`: None
|
684 |
+
- `past_index`: -1
|
685 |
+
- `disable_tqdm`: False
|
686 |
+
- `remove_unused_columns`: True
|
687 |
+
- `label_names`: None
|
688 |
+
- `load_best_model_at_end`: False
|
689 |
+
- `ignore_data_skip`: False
|
690 |
+
- `fsdp`: []
|
691 |
+
- `fsdp_min_num_params`: 0
|
692 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
693 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
694 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
695 |
+
- `deepspeed`: None
|
696 |
+
- `label_smoothing_factor`: 0.0
|
697 |
+
- `optim`: adamw_torch
|
698 |
+
- `optim_args`: None
|
699 |
+
- `adafactor`: False
|
700 |
+
- `group_by_length`: False
|
701 |
+
- `length_column_name`: length
|
702 |
+
- `ddp_find_unused_parameters`: None
|
703 |
+
- `ddp_bucket_cap_mb`: None
|
704 |
+
- `ddp_broadcast_buffers`: False
|
705 |
+
- `dataloader_pin_memory`: True
|
706 |
+
- `dataloader_persistent_workers`: False
|
707 |
+
- `skip_memory_metrics`: True
|
708 |
+
- `use_legacy_prediction_loop`: False
|
709 |
+
- `push_to_hub`: False
|
710 |
+
- `resume_from_checkpoint`: None
|
711 |
+
- `hub_model_id`: None
|
712 |
+
- `hub_strategy`: every_save
|
713 |
+
- `hub_private_repo`: False
|
714 |
+
- `hub_always_push`: False
|
715 |
+
- `gradient_checkpointing`: False
|
716 |
+
- `gradient_checkpointing_kwargs`: None
|
717 |
+
- `include_inputs_for_metrics`: False
|
718 |
+
- `eval_do_concat_batches`: True
|
719 |
+
- `fp16_backend`: auto
|
720 |
+
- `push_to_hub_model_id`: None
|
721 |
+
- `push_to_hub_organization`: None
|
722 |
+
- `mp_parameters`:
|
723 |
+
- `auto_find_batch_size`: False
|
724 |
+
- `full_determinism`: False
|
725 |
+
- `torchdynamo`: None
|
726 |
+
- `ray_scope`: last
|
727 |
+
- `ddp_timeout`: 1800
|
728 |
+
- `torch_compile`: False
|
729 |
+
- `torch_compile_backend`: None
|
730 |
+
- `torch_compile_mode`: None
|
731 |
+
- `dispatch_batches`: None
|
732 |
+
- `split_batches`: None
|
733 |
+
- `include_tokens_per_second`: False
|
734 |
+
- `include_num_input_tokens_seen`: False
|
735 |
+
- `neftune_noise_alpha`: None
|
736 |
+
- `optim_target_modules`: None
|
737 |
+
- `batch_eval_metrics`: False
|
738 |
+
- `eval_on_start`: False
|
739 |
+
- `eval_use_gather_object`: False
|
740 |
+
- `batch_sampler`: batch_sampler
|
741 |
+
- `multi_dataset_batch_sampler`: round_robin
|
742 |
+
|
743 |
+
</details>
|
744 |
+
|
745 |
+
### Training Logs
|
746 |
+
| Epoch | Step |
|
747 |
+
|:------:|:----:|
|
748 |
+
| 0.0984 | 24 |
|
749 |
+
| 0.1967 | 48 |
|
750 |
+
| 0.2951 | 72 |
|
751 |
+
| 0.3934 | 96 |
|
752 |
+
| 0.4918 | 120 |
|
753 |
+
| 0.5902 | 144 |
|
754 |
+
| 0.6885 | 168 |
|
755 |
+
| 0.7869 | 192 |
|
756 |
+
| 0.8852 | 216 |
|
757 |
+
| 0.9836 | 240 |
|
758 |
+
| 1.0 | 244 |
|
759 |
+
|
760 |
+
|
761 |
+
### Framework Versions
|
762 |
+
- Python: 3.10.12
|
763 |
+
- Sentence Transformers: 3.2.0
|
764 |
+
- Transformers: 4.44.2
|
765 |
+
- PyTorch: 2.4.1+cu121
|
766 |
+
- Accelerate: 0.34.2
|
767 |
+
- Datasets: 3.0.1
|
768 |
+
- Tokenizers: 0.19.1
|
769 |
+
|
770 |
+
## Citation
|
771 |
+
|
772 |
+
### BibTeX
|
773 |
+
|
774 |
+
#### Sentence Transformers
|
775 |
+
```bibtex
|
776 |
+
@inproceedings{reimers-2019-sentence-bert,
|
777 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
778 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
779 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
780 |
+
month = "11",
|
781 |
+
year = "2019",
|
782 |
+
publisher = "Association for Computational Linguistics",
|
783 |
+
url = "https://arxiv.org/abs/1908.10084",
|
784 |
+
}
|
785 |
+
```
|
786 |
+
|
787 |
+
#### MultipleNegativesRankingLoss
|
788 |
+
```bibtex
|
789 |
+
@misc{henderson2017efficient,
|
790 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
791 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
792 |
+
year={2017},
|
793 |
+
eprint={1705.00652},
|
794 |
+
archivePrefix={arXiv},
|
795 |
+
primaryClass={cs.CL}
|
796 |
+
}
|
797 |
+
```
|
798 |
+
|
799 |
+
<!--
|
800 |
+
## Glossary
|
801 |
+
|
802 |
+
*Clearly define terms in order to be accessible across audiences.*
|
803 |
+
-->
|
804 |
+
|
805 |
+
<!--
|
806 |
+
## Model Card Authors
|
807 |
+
|
808 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
809 |
+
-->
|
810 |
+
|
811 |
+
<!--
|
812 |
+
## Model Card Contact
|
813 |
+
|
814 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
815 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "mixedbread-ai/mxbai-embed-large-v1",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 4096,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 24,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.44.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": false,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.2.0",
|
4 |
+
"transformers": "4.44.2",
|
5 |
+
"pytorch": "2.4.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bae611998b5ef94be6341dd5e93fbc4d7e58c4ce308e57b605cb65a3abc45160
|
3 |
+
size 1340612432
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 128,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|