Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +647 -0
- config.json +26 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,647 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: mixedbread-ai/mxbai-embed-large-v1
|
3 |
+
library_name: sentence-transformers
|
4 |
+
pipeline_tag: sentence-similarity
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:112464
|
11 |
+
- loss:MultipleNegativesRankingLoss
|
12 |
+
widget:
|
13 |
+
- source_sentence: 'Stocked the different varieties of household goods and gardening
|
14 |
+
equipment
|
15 |
+
|
16 |
+
Gave in-depth product knowledge to customers who required detailed information
|
17 |
+
on particular products
|
18 |
+
|
19 |
+
Promoted a welcoming environment where customers and suppliers received great
|
20 |
+
service and strived to maintain the shop to exceptional standards'
|
21 |
+
sentences:
|
22 |
+
- 'Monitor and maximize retail budgets.
|
23 |
+
|
24 |
+
Training and maintaining the skills and well-being of current staff.
|
25 |
+
|
26 |
+
Interviewing and selectively hire the most qualified candidates.'
|
27 |
+
- 'Responsible for checking and unpacking stock deliveries, reporting any damages
|
28 |
+
or defects to the couriers and suppliers, correctly pricing items and displaying
|
29 |
+
goods encouraging customers to make purchases
|
30 |
+
|
31 |
+
Presented products neatly ensuring the shop floor was tidy at all times, keeping
|
32 |
+
the area safe of hazards
|
33 |
+
|
34 |
+
Dealt with customer enquiries and complaints face-to-face, resolving any issues
|
35 |
+
quickly and efficiently and rarely had to escalate them to management to resolve'
|
36 |
+
- 'Operating their EPOS computer system.
|
37 |
+
|
38 |
+
Working well on my own and as part as a team.
|
39 |
+
|
40 |
+
Handling all incoming and outgoing phone queries.
|
41 |
+
|
42 |
+
Dealing with all incoming and outgoing emails from other branches and head office.'
|
43 |
+
- source_sentence: 'Advising on best practices.
|
44 |
+
|
45 |
+
Installing and maintaining Windows and Linux network systems.
|
46 |
+
|
47 |
+
Installing, uninstalling, troubleshooting specific Software for hospital based
|
48 |
+
equipment.'
|
49 |
+
sentences:
|
50 |
+
- 'Hardware and software installation and desktop support.
|
51 |
+
|
52 |
+
Solving I.T. issues for hospital staff.
|
53 |
+
|
54 |
+
Backing up Data Systems, setting RAID configurations, wiring, setting up network
|
55 |
+
and proxy servers. Fixing and troubleshooting desktop computers and laptops.'
|
56 |
+
- 'Preparing and maintaining all aspects of paperwork from accounts to ordering
|
57 |
+
of stock, selling of animals and paying of suppliers
|
58 |
+
|
59 |
+
All general roles associated with the day to day running of a farm
|
60 |
+
|
61 |
+
Liaising with Agri reps in adhering to Health and Safety Standards'
|
62 |
+
- 'Working to deadlines daily
|
63 |
+
|
64 |
+
Maintaining customer accounts
|
65 |
+
|
66 |
+
Managed 4 Reps accounts after 6months'
|
67 |
+
- source_sentence: 'Maintained good time keeping and delivered within the required
|
68 |
+
journey time limit
|
69 |
+
|
70 |
+
Maintained accurate and clear paperwork and delivery records
|
71 |
+
|
72 |
+
Developed self-discipline and good organisational skills, using initiative and
|
73 |
+
able to solve problems effectively as they arose
|
74 |
+
|
75 |
+
Developed a good geographical knowledge with the ability to read maps and plan
|
76 |
+
routes
|
77 |
+
|
78 |
+
Followed strict Road Traffic laws with regards to speed and weight limits
|
79 |
+
|
80 |
+
Complied with strict Health and Safety and Welfare procedures, policies and standards
|
81 |
+
along with statutory
|
82 |
+
|
83 |
+
Coordinated schedule of pick up points and delivery addresses and planned the
|
84 |
+
most efficient route, sorting packages into order of dropping off points'
|
85 |
+
sentences:
|
86 |
+
- 'Purchasing of all supplies, equipment, furniture and services in accordance with
|
87 |
+
NBK procedures. To assist with purchasing and design of stationary/printing. Arrange
|
88 |
+
quotations, proofs and printing of approved changes as necessary. Produce purchase
|
89 |
+
orders.
|
90 |
+
|
91 |
+
General ad hoc tasks.
|
92 |
+
|
93 |
+
Research new, potential suppliers that are reliable and deliver a high standard
|
94 |
+
of service, whilst considering the importance of cost savings.
|
95 |
+
|
96 |
+
Answering switchboard in a polite and efficient manner and transferring calls
|
97 |
+
to the relevant colleague.
|
98 |
+
|
99 |
+
Maintain asset register including monthly depreciation, new assets, write offs,
|
100 |
+
asset tagging, auditing, related accounting etc (fixed assets). Scanning and barcoding
|
101 |
+
new assets for our register software. Making sure the necessary information is
|
102 |
+
recorded - cost centre, product details, location.'
|
103 |
+
- 'Co-operated with despatch and receiving staff, assisting with the storage and
|
104 |
+
removal of packages
|
105 |
+
|
106 |
+
Maintained physical fitness levels and the ability to work in a stressful environment
|
107 |
+
|
108 |
+
Collected and delivered home shopping products safely to customers in the shortest
|
109 |
+
available time
|
110 |
+
|
111 |
+
Meticulously inspected all lights, brakes, fuel and tyres were in good working
|
112 |
+
condition before and during journeys, which required good attention to detail
|
113 |
+
|
114 |
+
Sought the quickest route to the delivery addresses and ensured all packages were
|
115 |
+
correctly signed for'
|
116 |
+
- 'Responsible for Quality Assurance of Projects according to QMS (Quality Management
|
117 |
+
System)
|
118 |
+
|
119 |
+
Coordinating Verification & Validation activities and resources
|
120 |
+
|
121 |
+
Producing HW and Mechanical Verification & Validation Reports
|
122 |
+
|
123 |
+
Supporting System, SW, Electronic, Mechanical Requirements definitions
|
124 |
+
|
125 |
+
Ensuring traceability among HW and Mechanical requirements, Design Documents,
|
126 |
+
test procedures and test report, using relevant configuration management tools
|
127 |
+
and producing relevant Traceability matrix document
|
128 |
+
|
129 |
+
Supporting Regulatory submissions, interfacing with the notified body for IEC
|
130 |
+
62304 compliancy
|
131 |
+
|
132 |
+
Defining Test Plan and verification strategies
|
133 |
+
|
134 |
+
Overseeing SW Unit tests: Formal Code Inspection Reviews, automated test execution
|
135 |
+
in simulated SW in the loop test environment
|
136 |
+
|
137 |
+
Defining and performing System Test Procedures and manual /automated test sequences
|
138 |
+
to be executed in real or simulated environments (Black Box test)
|
139 |
+
|
140 |
+
Supporting Device Design transfer to Manufacturing, by writing Operative Instructions,
|
141 |
+
Procedures, Production requirements, etc.
|
142 |
+
|
143 |
+
Verification and Validations activities:
|
144 |
+
|
145 |
+
Requirements Change Management
|
146 |
+
|
147 |
+
Repeating relevant tests affected by the change, according to the regression analysis
|
148 |
+
|
149 |
+
Supporting identification of Functional, Usability, Risk Control Measures Requirements
|
150 |
+
|
151 |
+
HW/SW Development processes: Team and Resources Coordination, Monitoring/Tracking
|
152 |
+
Activities Status, Scheduling and coordination of Technical meetings, Project
|
153 |
+
Reviews meetings and Status Review Meetings. Preparing and archiving Project revision,
|
154 |
+
verification and validation reports. Supporting or carrying out Risk Analysis,
|
155 |
+
System Engineering, Software and Mechanical Design and Development activities,
|
156 |
+
thanks to a strong technical background and skills.
|
157 |
+
|
158 |
+
Risk Management
|
159 |
+
|
160 |
+
Suppliers Management: Allocating and transferring technical and quality requirements
|
161 |
+
to strategic suppliers. Monitoring strategic and critical outsourcing activities.
|
162 |
+
Seeking reliable or strategic Suppliers. Supporting the purchasing manager in
|
163 |
+
negotiating prices and delivery time, reviewing technical datasheet or specifications,
|
164 |
+
requesting quotation, determining quantity and schedule of deliveries
|
165 |
+
|
166 |
+
Ensuring traceability among System and SW requirements, SW Design Documents, SW
|
167 |
+
Unit Verification, System test cases and Test Results, using relevant configuration
|
168 |
+
management tools and producing SW Traceability matrix document
|
169 |
+
|
170 |
+
Starting Processes: Scope Definition and Project Charter
|
171 |
+
|
172 |
+
Supporting System, SW, Electronic, Mechanical Design Reviews
|
173 |
+
|
174 |
+
Supporting Test environment and Testing Tool Requirements definition
|
175 |
+
|
176 |
+
Regulatory and Product Certification: Interaction with Notified Body (es. IMQ,
|
177 |
+
TUV) for CE/CB certification, according to 93/42 directive and EN 60601 standard
|
178 |
+
Series. Interaction with Medical Testing labs during safety, EMC, acoustic, etc.
|
179 |
+
certification compliance test
|
180 |
+
|
181 |
+
Change Management and Configuration Control.'
|
182 |
+
- source_sentence: 'Maintained a high level of quality in each case that reviewed
|
183 |
+
the content
|
184 |
+
|
185 |
+
Assisted new joiners in shadowing process
|
186 |
+
|
187 |
+
Focused on analysing, labelling and discovering patterns of suspicious activity
|
188 |
+
- with minimal supervision
|
189 |
+
|
190 |
+
Balanced priorities of daily workflow tasks in line with client needs'
|
191 |
+
sentences:
|
192 |
+
- 'Meeting both new and existing client
|
193 |
+
|
194 |
+
Generating new business both in face to face meetings and over the phone.
|
195 |
+
|
196 |
+
Writing up sales reports, and activity reports.
|
197 |
+
|
198 |
+
Writing up concise, value-based sales proposals.
|
199 |
+
|
200 |
+
Replying to all customer enquiries in a timely and accurate manner.'
|
201 |
+
- 'Reinforced concrete construction, structural steel and MEP works
|
202 |
+
|
203 |
+
Eastern and Western Ticket Halls connected by tunnels and platforms
|
204 |
+
|
205 |
+
In depth exposure and management of commercial issues, programme, interface and
|
206 |
+
handover for the contract
|
207 |
+
|
208 |
+
Dealt closely with project management team
|
209 |
+
|
210 |
+
Enhanced multi-tasking skills
|
211 |
+
|
212 |
+
Rapid familiarisation with design drawings, specifications and standards
|
213 |
+
|
214 |
+
Completion of quality audits internally and on suppliers
|
215 |
+
|
216 |
+
Close liaison between contractor, subcontractors, design, commercial and client
|
217 |
+
at management levels
|
218 |
+
|
219 |
+
£ 250M
|
220 |
+
|
221 |
+
Management of quality issues throughout the construction process'
|
222 |
+
- 'trained the Machine how to process data with accuracy, with excellent quality
|
223 |
+
in artificial intelligence learning process
|
224 |
+
|
225 |
+
Escalated violations of client policies using internal tools
|
226 |
+
|
227 |
+
Visually navigated and reviewed images/video along with text-based content through
|
228 |
+
internally developed applications
|
229 |
+
|
230 |
+
Actively took part in different internal projects
|
231 |
+
|
232 |
+
Evaluated online social media and advertising content, making sure it is in line
|
233 |
+
with the client''s policy
|
234 |
+
|
235 |
+
Achieved weekly productivity deliverables as part of daily workflow'
|
236 |
+
- source_sentence: 'Responsible for lease negotiations and rent collections with the
|
237 |
+
aim of maximising yields for rented properties.
|
238 |
+
|
239 |
+
Dealing with freedom of information requests.
|
240 |
+
|
241 |
+
Dealing with title issues.
|
242 |
+
|
243 |
+
Manage a wide range of insolvency assignments such as Fixed & Floating charge
|
244 |
+
Receiverships, Members Voluntary Liquidations, Creditors Voluntary Liquidations
|
245 |
+
and Court Appointed Liquidations.
|
246 |
+
|
247 |
+
Development and roll out of disposal strategies for all properties under management.
|
248 |
+
|
249 |
+
Manage the build out of a number of ghost estates on behalf of NAMA. A current
|
250 |
+
project involves remediation works to 84 residential units in Co. Monaghan with
|
251 |
+
a value of EUR 10 Million.
|
252 |
+
|
253 |
+
Preparation of tenders for NAMA & other financial institutions.
|
254 |
+
|
255 |
+
Attending meeting with borrowers and financial institutions.
|
256 |
+
|
257 |
+
Coordinate with estate agent to ensure we are receiving maximum yields for rented
|
258 |
+
properties. I am responsible for the management of in excess of 200 rental properties
|
259 |
+
across various Receiverships under my remit.'
|
260 |
+
sentences:
|
261 |
+
- 'Preparation of budgets in order to manage cash flow throughout the various assignments.
|
262 |
+
|
263 |
+
Preparation and submission of tax and CRO returns.
|
264 |
+
|
265 |
+
Communicate with Solicitors, Estate Agents and Assets Managers on a daily basis
|
266 |
+
to ensure properties are brought to market and sold in a timely manner.
|
267 |
+
|
268 |
+
Drafting monthly/quarterly reports for case managers.
|
269 |
+
|
270 |
+
Reviewing tenders received and appointment of professional service firms.
|
271 |
+
|
272 |
+
Liaising with NAMA case managers and our internal tax department in order to determine
|
273 |
+
the most tax efficient manner to dispose of properties.'
|
274 |
+
- 'Retail businesses - high street shops
|
275 |
+
|
276 |
+
Trades - electricians, joiners, printers
|
277 |
+
|
278 |
+
Oil industry supply companies including, fabricators, machinists and designers
|
279 |
+
for offshore and onshore applications.
|
280 |
+
|
281 |
+
Supporting business owners to grow their businesses, by providing them with strategies
|
282 |
+
to develop effectiveness and improve profitability through all operational activities
|
283 |
+
including Purchasing, Stock Control, Production Planning, Sales, Marketing, Distribution
|
284 |
+
and Customer Services.
|
285 |
+
|
286 |
+
Worked with 26 different businesses in 2010/11 and achieved an average increase
|
287 |
+
in Gross Profit of 39% for those businesses plus additional benefits of business
|
288 |
+
efficiency and team effectiveness.'
|
289 |
+
- 'Realization of bedside rounds and teaching.
|
290 |
+
|
291 |
+
Program implementation and development which include: administrative and HR management;
|
292 |
+
conception and implementation of information system; Conception, implementation
|
293 |
+
and coordination of PMTCT program.
|
294 |
+
|
295 |
+
Monthly report of activities.
|
296 |
+
|
297 |
+
Planning and Supervision of mortality and morbidity review (MMR).
|
298 |
+
|
299 |
+
Responsible for communication with the pediatric Saint Damien Hospital and other
|
300 |
+
existing programs in the same hospital.
|
301 |
+
|
302 |
+
Note: This program run by NPFS/Saint Damien and funded by Francesca Rava foundation
|
303 |
+
at
|
304 |
+
|
305 |
+
Supervision of the staff (12 Obstetricians, 7 anesthetists, 16 nurse midwives,
|
306 |
+
6 auxiliary midwives, 1 administrative assistant , 1 data clerk etc.)
|
307 |
+
|
308 |
+
Performance of ultrasound
|
309 |
+
|
310 |
+
Clinical work according to day time schedule
|
311 |
+
|
312 |
+
Performance of surgical procedures'
|
313 |
+
---
|
314 |
+
|
315 |
+
# SentenceTransformer based on mixedbread-ai/mxbai-embed-large-v1
|
316 |
+
|
317 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
318 |
+
|
319 |
+
## Model Details
|
320 |
+
|
321 |
+
### Model Description
|
322 |
+
- **Model Type:** Sentence Transformer
|
323 |
+
- **Base model:** [mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1) <!-- at revision 526dc52cb738085d87002bf00ca4d3d99fd0029b -->
|
324 |
+
- **Maximum Sequence Length:** 128 tokens
|
325 |
+
- **Output Dimensionality:** 1024 tokens
|
326 |
+
- **Similarity Function:** Cosine Similarity
|
327 |
+
<!-- - **Training Dataset:** Unknown -->
|
328 |
+
<!-- - **Language:** Unknown -->
|
329 |
+
<!-- - **License:** Unknown -->
|
330 |
+
|
331 |
+
### Model Sources
|
332 |
+
|
333 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
334 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
335 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
336 |
+
|
337 |
+
### Full Model Architecture
|
338 |
+
|
339 |
+
```
|
340 |
+
SentenceTransformer(
|
341 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
342 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
343 |
+
)
|
344 |
+
```
|
345 |
+
|
346 |
+
## Usage
|
347 |
+
|
348 |
+
### Direct Usage (Sentence Transformers)
|
349 |
+
|
350 |
+
First install the Sentence Transformers library:
|
351 |
+
|
352 |
+
```bash
|
353 |
+
pip install -U sentence-transformers
|
354 |
+
```
|
355 |
+
|
356 |
+
Then you can load this model and run inference.
|
357 |
+
```python
|
358 |
+
from sentence_transformers import SentenceTransformer
|
359 |
+
|
360 |
+
# Download from the 🤗 Hub
|
361 |
+
model = SentenceTransformer("Daxtra/sbert-trained-on-whs")
|
362 |
+
# Run inference
|
363 |
+
sentences = [
|
364 |
+
'Responsible for lease negotiations and rent collections with the aim of maximising yields for rented properties.\nDealing with freedom of information requests.\nDealing with title issues.\nManage a wide range of insolvency assignments such as Fixed & Floating charge Receiverships, Members Voluntary Liquidations, Creditors Voluntary Liquidations and Court Appointed Liquidations.\nDevelopment and roll out of disposal strategies for all properties under management.\nManage the build out of a number of ghost estates on behalf of NAMA. A current project involves remediation works to 84 residential units in Co. Monaghan with a value of EUR 10 Million.\nPreparation of tenders for NAMA & other financial institutions.\nAttending meeting with borrowers and financial institutions.\nCoordinate with estate agent to ensure we are receiving maximum yields for rented properties. I am responsible for the management of in excess of 200 rental properties across various Receiverships under my remit.',
|
365 |
+
'Preparation of budgets in order to manage cash flow throughout the various assignments.\nPreparation and submission of tax and CRO returns.\nCommunicate with Solicitors, Estate Agents and Assets Managers on a daily basis to ensure properties are brought to market and sold in a timely manner.\nDrafting monthly/quarterly reports for case managers.\nReviewing tenders received and appointment of professional service firms.\nLiaising with NAMA case managers and our internal tax department in order to determine the most tax efficient manner to dispose of properties.',
|
366 |
+
'Realization of bedside rounds and teaching.\nProgram implementation and development which include: administrative and HR management; conception and implementation of information system; Conception, implementation and coordination of PMTCT program.\nMonthly report of activities.\nPlanning and Supervision of mortality and morbidity review (MMR).\nResponsible for communication with the pediatric Saint Damien Hospital and other existing programs in the same hospital.\nNote: This program run by NPFS/Saint Damien and funded by Francesca Rava foundation at\nSupervision of the staff (12 Obstetricians, 7 anesthetists, 16 nurse midwives, 6 auxiliary midwives, 1 administrative assistant , 1 data clerk etc.)\nPerformance of ultrasound\nClinical work according to day time schedule\nPerformance of surgical procedures',
|
367 |
+
]
|
368 |
+
embeddings = model.encode(sentences)
|
369 |
+
print(embeddings.shape)
|
370 |
+
# [3, 1024]
|
371 |
+
|
372 |
+
# Get the similarity scores for the embeddings
|
373 |
+
similarities = model.similarity(embeddings, embeddings)
|
374 |
+
print(similarities.shape)
|
375 |
+
# [3, 3]
|
376 |
+
```
|
377 |
+
|
378 |
+
<!--
|
379 |
+
### Direct Usage (Transformers)
|
380 |
+
|
381 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
382 |
+
|
383 |
+
</details>
|
384 |
+
-->
|
385 |
+
|
386 |
+
<!--
|
387 |
+
### Downstream Usage (Sentence Transformers)
|
388 |
+
|
389 |
+
You can finetune this model on your own dataset.
|
390 |
+
|
391 |
+
<details><summary>Click to expand</summary>
|
392 |
+
|
393 |
+
</details>
|
394 |
+
-->
|
395 |
+
|
396 |
+
<!--
|
397 |
+
### Out-of-Scope Use
|
398 |
+
|
399 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
400 |
+
-->
|
401 |
+
|
402 |
+
<!--
|
403 |
+
## Bias, Risks and Limitations
|
404 |
+
|
405 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
406 |
+
-->
|
407 |
+
|
408 |
+
<!--
|
409 |
+
### Recommendations
|
410 |
+
|
411 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
412 |
+
-->
|
413 |
+
|
414 |
+
## Training Details
|
415 |
+
|
416 |
+
### Training Dataset
|
417 |
+
|
418 |
+
#### Unnamed Dataset
|
419 |
+
|
420 |
+
|
421 |
+
* Size: 112,464 training samples
|
422 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
423 |
+
* Approximate statistics based on the first 1000 samples:
|
424 |
+
| | sentence_0 | sentence_1 |
|
425 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
426 |
+
| type | string | string |
|
427 |
+
| details | <ul><li>min: 8 tokens</li><li>mean: 64.94 tokens</li><li>max: 128 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 64.91 tokens</li><li>max: 128 tokens</li></ul> |
|
428 |
+
* Samples:
|
429 |
+
| sentence_0 | sentence_1 |
|
430 |
+
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
431 |
+
| <code>Co-authored standalone Moodle site on sustainability that was marketed by the college and sold to various 3rd parties, making in excess of £ 20,000.<br>Expert-level knowledge in eLearning and Virtual Learning Environments through daily use, including Moodle and Mahara with the requirement to produced tailored courses on the technology to a diverse range of teaching professionals;<br>Delivered in excess of 20 training sessions to over 600 lecturers on a range of new innovative practices in education including innovations within the VLE which supported their continued professional development and enhanced their classroom performance with a participant satisfaction rate regularly in excess of 95%;</code> | <code>Administered Moodle across college, supporting learner management and championing the use of core modules to support the tracking and assessment of in excess of 15000 learners.<br>Coordinated and managed multiple syllabuses and competency tests for advanced software development course, emphasising quality of resources to promote self-guided learning in addition to more traditional approaches, increasing intake by 400% over a 3 year period, achieving pass and completion rate to in excess of 97%;<br>Improved quality of eLearning resources through the delivery of training on the use of screen-recording and simulation software in content development, increasing the use of the Virtual Learning Environment from something that would serve as a repository of worksheets to a more interactive and engaging application, appearing as the top visited pages in weekly reports;<br>Promoted to the unique position of Head Judge in Web Design by the Government-backed National Apprenticeship Service, project managing and collaborating with a team of Expert Judges nationally setting up the timetabling and resourcing of events attended by in excess of 100 student competitors;</code> |
|
432 |
+
| <code>Advising on best practices.<br>Installing and maintaining Windows and Linux network systems.<br>Installing, uninstalling, troubleshooting specific Software for hospital based equipment.</code> | <code>Hardware and software installation and desktop support.<br>Solving I.T. issues for hospital staff.<br>Backing up Data Systems, setting RAID configurations, wiring, setting up network and proxy servers. Fixing and troubleshooting desktop computers and laptops.</code> |
|
433 |
+
| <code>Analysis of data from the manufacture of finished goods, distribution of materials in the production of finished products;<br>Preparation of cost of production calculations for finished products;<br>Full maintenance of accounting, tax, and management accounting in accordance with the current legislation of Ukraine.<br>Accounting for cash transactions;</code> | <code>Maintenance of personnel documents (orders, contracts, employment records);<br>Work with primary documents (billing, acts, account invoices, tax invoices, work in Client Bank and Privat 24, carrying out banking operations, preparation of acts of reconciliation, payroll, accounting of goods and materials);<br>Preparation and submission of financial, statistical, and tax reporting.</code> |
|
434 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
435 |
+
```json
|
436 |
+
{
|
437 |
+
"scale": 20.0,
|
438 |
+
"similarity_fct": "cos_sim"
|
439 |
+
}
|
440 |
+
```
|
441 |
+
|
442 |
+
### Training Hyperparameters
|
443 |
+
#### Non-Default Hyperparameters
|
444 |
+
|
445 |
+
- `eval_strategy`: steps
|
446 |
+
- `per_device_train_batch_size`: 24
|
447 |
+
- `per_device_eval_batch_size`: 24
|
448 |
+
- `num_train_epochs`: 1
|
449 |
+
- `multi_dataset_batch_sampler`: round_robin
|
450 |
+
|
451 |
+
#### All Hyperparameters
|
452 |
+
<details><summary>Click to expand</summary>
|
453 |
+
|
454 |
+
- `overwrite_output_dir`: False
|
455 |
+
- `do_predict`: False
|
456 |
+
- `eval_strategy`: steps
|
457 |
+
- `prediction_loss_only`: True
|
458 |
+
- `per_device_train_batch_size`: 24
|
459 |
+
- `per_device_eval_batch_size`: 24
|
460 |
+
- `per_gpu_train_batch_size`: None
|
461 |
+
- `per_gpu_eval_batch_size`: None
|
462 |
+
- `gradient_accumulation_steps`: 1
|
463 |
+
- `eval_accumulation_steps`: None
|
464 |
+
- `torch_empty_cache_steps`: None
|
465 |
+
- `learning_rate`: 5e-05
|
466 |
+
- `weight_decay`: 0.0
|
467 |
+
- `adam_beta1`: 0.9
|
468 |
+
- `adam_beta2`: 0.999
|
469 |
+
- `adam_epsilon`: 1e-08
|
470 |
+
- `max_grad_norm`: 1
|
471 |
+
- `num_train_epochs`: 1
|
472 |
+
- `max_steps`: -1
|
473 |
+
- `lr_scheduler_type`: linear
|
474 |
+
- `lr_scheduler_kwargs`: {}
|
475 |
+
- `warmup_ratio`: 0.0
|
476 |
+
- `warmup_steps`: 0
|
477 |
+
- `log_level`: passive
|
478 |
+
- `log_level_replica`: warning
|
479 |
+
- `log_on_each_node`: True
|
480 |
+
- `logging_nan_inf_filter`: True
|
481 |
+
- `save_safetensors`: True
|
482 |
+
- `save_on_each_node`: False
|
483 |
+
- `save_only_model`: False
|
484 |
+
- `restore_callback_states_from_checkpoint`: False
|
485 |
+
- `no_cuda`: False
|
486 |
+
- `use_cpu`: False
|
487 |
+
- `use_mps_device`: False
|
488 |
+
- `seed`: 42
|
489 |
+
- `data_seed`: None
|
490 |
+
- `jit_mode_eval`: False
|
491 |
+
- `use_ipex`: False
|
492 |
+
- `bf16`: False
|
493 |
+
- `fp16`: False
|
494 |
+
- `fp16_opt_level`: O1
|
495 |
+
- `half_precision_backend`: auto
|
496 |
+
- `bf16_full_eval`: False
|
497 |
+
- `fp16_full_eval`: False
|
498 |
+
- `tf32`: None
|
499 |
+
- `local_rank`: 0
|
500 |
+
- `ddp_backend`: None
|
501 |
+
- `tpu_num_cores`: None
|
502 |
+
- `tpu_metrics_debug`: False
|
503 |
+
- `debug`: []
|
504 |
+
- `dataloader_drop_last`: False
|
505 |
+
- `dataloader_num_workers`: 0
|
506 |
+
- `dataloader_prefetch_factor`: None
|
507 |
+
- `past_index`: -1
|
508 |
+
- `disable_tqdm`: False
|
509 |
+
- `remove_unused_columns`: True
|
510 |
+
- `label_names`: None
|
511 |
+
- `load_best_model_at_end`: False
|
512 |
+
- `ignore_data_skip`: False
|
513 |
+
- `fsdp`: []
|
514 |
+
- `fsdp_min_num_params`: 0
|
515 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
516 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
517 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
518 |
+
- `deepspeed`: None
|
519 |
+
- `label_smoothing_factor`: 0.0
|
520 |
+
- `optim`: adamw_torch
|
521 |
+
- `optim_args`: None
|
522 |
+
- `adafactor`: False
|
523 |
+
- `group_by_length`: False
|
524 |
+
- `length_column_name`: length
|
525 |
+
- `ddp_find_unused_parameters`: None
|
526 |
+
- `ddp_bucket_cap_mb`: None
|
527 |
+
- `ddp_broadcast_buffers`: False
|
528 |
+
- `dataloader_pin_memory`: True
|
529 |
+
- `dataloader_persistent_workers`: False
|
530 |
+
- `skip_memory_metrics`: True
|
531 |
+
- `use_legacy_prediction_loop`: False
|
532 |
+
- `push_to_hub`: False
|
533 |
+
- `resume_from_checkpoint`: None
|
534 |
+
- `hub_model_id`: None
|
535 |
+
- `hub_strategy`: every_save
|
536 |
+
- `hub_private_repo`: False
|
537 |
+
- `hub_always_push`: False
|
538 |
+
- `gradient_checkpointing`: False
|
539 |
+
- `gradient_checkpointing_kwargs`: None
|
540 |
+
- `include_inputs_for_metrics`: False
|
541 |
+
- `eval_do_concat_batches`: True
|
542 |
+
- `fp16_backend`: auto
|
543 |
+
- `push_to_hub_model_id`: None
|
544 |
+
- `push_to_hub_organization`: None
|
545 |
+
- `mp_parameters`:
|
546 |
+
- `auto_find_batch_size`: False
|
547 |
+
- `full_determinism`: False
|
548 |
+
- `torchdynamo`: None
|
549 |
+
- `ray_scope`: last
|
550 |
+
- `ddp_timeout`: 1800
|
551 |
+
- `torch_compile`: False
|
552 |
+
- `torch_compile_backend`: None
|
553 |
+
- `torch_compile_mode`: None
|
554 |
+
- `dispatch_batches`: None
|
555 |
+
- `split_batches`: None
|
556 |
+
- `include_tokens_per_second`: False
|
557 |
+
- `include_num_input_tokens_seen`: False
|
558 |
+
- `neftune_noise_alpha`: None
|
559 |
+
- `optim_target_modules`: None
|
560 |
+
- `batch_eval_metrics`: False
|
561 |
+
- `eval_on_start`: False
|
562 |
+
- `eval_use_gather_object`: False
|
563 |
+
- `batch_sampler`: batch_sampler
|
564 |
+
- `multi_dataset_batch_sampler`: round_robin
|
565 |
+
|
566 |
+
</details>
|
567 |
+
|
568 |
+
### Training Logs
|
569 |
+
| Epoch | Step | Training Loss |
|
570 |
+
|:------:|:----:|:-------------:|
|
571 |
+
| 0.0999 | 468 | - |
|
572 |
+
| 0.1067 | 500 | 0.432 |
|
573 |
+
| 0.1997 | 936 | - |
|
574 |
+
| 0.2134 | 1000 | 0.2153 |
|
575 |
+
| 0.2996 | 1404 | - |
|
576 |
+
| 0.3201 | 1500 | 0.1997 |
|
577 |
+
| 0.3995 | 1872 | - |
|
578 |
+
| 0.4268 | 2000 | 0.1635 |
|
579 |
+
| 0.4994 | 2340 | - |
|
580 |
+
| 0.5335 | 2500 | 0.1573 |
|
581 |
+
| 0.5992 | 2808 | - |
|
582 |
+
| 0.6402 | 3000 | 0.1518 |
|
583 |
+
| 0.6991 | 3276 | - |
|
584 |
+
| 0.7469 | 3500 | 0.1359 |
|
585 |
+
| 0.7990 | 3744 | - |
|
586 |
+
| 0.8536 | 4000 | 0.1351 |
|
587 |
+
| 0.8988 | 4212 | - |
|
588 |
+
| 0.9603 | 4500 | 0.1187 |
|
589 |
+
| 0.9987 | 4680 | - |
|
590 |
+
| 1.0 | 4686 | - |
|
591 |
+
|
592 |
+
|
593 |
+
### Framework Versions
|
594 |
+
- Python: 3.10.12
|
595 |
+
- Sentence Transformers: 3.2.0
|
596 |
+
- Transformers: 4.44.2
|
597 |
+
- PyTorch: 2.4.1+cu121
|
598 |
+
- Accelerate: 0.34.2
|
599 |
+
- Datasets: 3.0.1
|
600 |
+
- Tokenizers: 0.19.1
|
601 |
+
|
602 |
+
## Citation
|
603 |
+
|
604 |
+
### BibTeX
|
605 |
+
|
606 |
+
#### Sentence Transformers
|
607 |
+
```bibtex
|
608 |
+
@inproceedings{reimers-2019-sentence-bert,
|
609 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
610 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
611 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
612 |
+
month = "11",
|
613 |
+
year = "2019",
|
614 |
+
publisher = "Association for Computational Linguistics",
|
615 |
+
url = "https://arxiv.org/abs/1908.10084",
|
616 |
+
}
|
617 |
+
```
|
618 |
+
|
619 |
+
#### MultipleNegativesRankingLoss
|
620 |
+
```bibtex
|
621 |
+
@misc{henderson2017efficient,
|
622 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
623 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
624 |
+
year={2017},
|
625 |
+
eprint={1705.00652},
|
626 |
+
archivePrefix={arXiv},
|
627 |
+
primaryClass={cs.CL}
|
628 |
+
}
|
629 |
+
```
|
630 |
+
|
631 |
+
<!--
|
632 |
+
## Glossary
|
633 |
+
|
634 |
+
*Clearly define terms in order to be accessible across audiences.*
|
635 |
+
-->
|
636 |
+
|
637 |
+
<!--
|
638 |
+
## Model Card Authors
|
639 |
+
|
640 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
641 |
+
-->
|
642 |
+
|
643 |
+
<!--
|
644 |
+
## Model Card Contact
|
645 |
+
|
646 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
647 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "mixedbread-ai/mxbai-embed-large-v1",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 4096,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 24,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.44.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": false,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.2.0",
|
4 |
+
"transformers": "4.44.2",
|
5 |
+
"pytorch": "2.4.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9c321d6e7962feea85fe74145072d120d064d0e5bf34d43dacc6104c25876cf
|
3 |
+
size 1340612432
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 128,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 128,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|