hamedrahimi
commited on
Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +929 -0
- config.json +46 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,929 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- generated_from_trainer
|
9 |
+
- dataset_size:104601
|
10 |
+
- loss:MultipleNegativesRankingLoss
|
11 |
+
base_model: nomic-ai/modernbert-embed-base
|
12 |
+
widget:
|
13 |
+
- source_sentence: How can the company assess the financial viability of its gaming
|
14 |
+
division in relation to its overall business strategy?
|
15 |
+
sentences:
|
16 |
+
- In the event of a partial or total liquidation of the Partnership or in the event
|
17 |
+
there were insufficient Partnership assets to satisfy the claims of its general
|
18 |
+
creditors , the limited partners may not be entitled to receive their entire Capital
|
19 |
+
Contribut ion amounts back. Limited partner capital ac counts are not guaranteed.
|
20 |
+
However, as a class, the limit ed partners would be entitled to receive the return
|
21 |
+
of their aggregate Capital Contri butions before the return of any capital contributions
|
22 |
+
to the subordinated limited partners or the general partners. If the Partnership
|
23 |
+
experiences losses in any year but liquidation procedures described above are
|
24 |
+
not undertaken and the Partne rship continues, the amounts of such losses would
|
25 |
+
be absorbed in the capital accounts of the partners as described in the Partnership
|
26 |
+
Agreement, and each limited partner in any event remains entitled to receive the
|
27 |
+
7½% Payments under t he terms of the Partnership Agreement. However, as there
|
28 |
+
would be no accumulated profits in such a year, limited partner s would not receive
|
29 |
+
any sums representing participation in net income of the Partnership. In addition,
|
30 |
+
although the amount of the 7½% Payments to limited partners are charged as an
|
31 |
+
expense to the Partnership and are pay able whether or not the Partnership ear
|
32 |
+
ns any accumulated profits during any given period, no reserve fund has been set
|
33 |
+
aside to enable the Partnership to make such payments. Therefore, such payments
|
34 |
+
to the limited partners are subject to the Partnership’s ability to service the
|
35 |
+
7½% Payment, of which there is no assurance.
|
36 |
+
- 10. Compliance of Award Agreement and Plan with Section 409A . The provisions
|
37 |
+
of this Paragraph 10 apply to you only if you are a U.S. taxpayer. (a) This Award
|
38 |
+
Agreement and the Plan provisions that apply to this Award are intended and will
|
39 |
+
be construed to comply with Section 409A (including the requirements applicable
|
40 |
+
to, or the conditions for exemption from treatment as, 409A Deferred Compensation),
|
41 |
+
whether by reason of short-term deferral treatment or other exceptions or provisions.
|
42 |
+
The Committee will have full authority to give effect to this intent. To the extent
|
43 |
+
necessary to give effect to this intent, in the case of any conflict or potential
|
44 |
+
inconsistency between the provisions of the Plan (including Sections 1.3.2 and
|
45 |
+
2.1 thereof) and this Award Agreement, the provisions of this Award Agreement
|
46 |
+
will govern, and in the case of any conflict or potential inconsistency between
|
47 |
+
this Paragraph 10 and the other provisions of this Award Agreement, this Paragraph
|
48 |
+
10 will govern. (b) Delivery of RSU Shares will not be delayed beyond the date
|
49 |
+
on which all applicable conditions or restrictions on delivery of RSU Shares required
|
50 |
+
by this Agreement (including those specified in Paragraphs 4, 6(b) and 7 and the
|
51 |
+
consents and other items specified in Section 3.3 of the Plan) are satisfied,
|
52 |
+
and will occur by December 31 of the calendar year in which the Delivery Date
|
53 |
+
occurs unless, in order to permit such conditions or restrictions to be satisfied,
|
54 |
+
the Committee elects, pursuant to Reg. 1.409A-1(b)(4)(i)(D) or otherwise as may
|
55 |
+
be permitted in accordance with Section 409A, to delay delivery of RSU Shares
|
56 |
+
to a later date as may be permitted under Section 409A, including Reg. 1.409A-3(d).
|
57 |
+
For the avoidance of doubt, if the Award includes a “series of installment payments”
|
58 |
+
as described in Reg. 1.409A-2(b)(2)(iii), your right to the series of installment
|
59 |
+
payments will be treated as a right to a series of separate payments and not as
|
60 |
+
a right to a single payment. (c) Notwithstanding the provisions of Paragraph 7(b)
|
61 |
+
and Section 1.3.2(i) of the Plan, to the extent necessary to comply with Section
|
62 |
+
409A, any securities, other Awards or other property that the Firm may deliver
|
63 |
+
in respect of your RSUs will not have the effect of deferring delivery or payment,
|
64 |
+
income inclusion, or a substantial risk of forfeiture, beyond the date on which
|
65 |
+
such delivery, payment or inclusion would occur or such risk of forfeiture would
|
66 |
+
lapse, with respect to the RSU Shares that would otherwise have been deliverable
|
67 |
+
(unless the Committee elects a later date for this purpose pursuant to Reg. 1.409A-1(b)(4)(i)(D)
|
68 |
+
or otherwise as may be permitted under Section 409A, including and to the extent
|
69 |
+
applicable, the subsequent election provisions of Section 409A(a)(4)(C) of the
|
70 |
+
Code and Reg. 1.409A-2(b)). (d) Notwithstanding the timing provisions of Paragraph
|
71 |
+
6(b), the delivery of RSU Shares referred to therein will be made after the date
|
72 |
+
of death and during the calendar year that includes the date of death (or on such
|
73 |
+
later date as may be permitted under Section 409A). (e) Notwithstanding any provision
|
74 |
+
of Paragraph 5 or Section 2.8.2 of the Plan to the contrary, the Dividend Equivalent
|
75 |
+
Rights with respect to each of your Outstanding RSUs will be paid to you within
|
76 |
+
the calendar year that includes the date of distribution of any corresponding
|
77 |
+
regular cash dividends paid by GS Inc. in respect of a share of Common Stock the
|
78 |
+
record date for which occurs on or after the Date of Grant. The payment will be
|
79 |
+
in an amount (less applicable withholding) equal to such regular dividend payment
|
80 |
+
as would have been made in respect of the RSU Shares underlying such Outstanding
|
81 |
+
RSUs. (f) The timing of delivery or payment referred to in Paragraph 6(a)(i) will
|
82 |
+
be the earlier of (i) the Delivery Date or (ii) within the calendar year in which
|
83 |
+
the Committee receives satisfactory documentation relating to your Conflicted
|
84 |
+
Employment, provided that such delivery or payment will be made, and any Committee
|
85 |
+
action referred to in Paragraph 6(a)(i) will be taken, only at such time as, and
|
86 |
+
if and to the extent that it, as reasonably determined by the Firm, would not
|
87 |
+
result in the imposition of any additional tax to you under Section 409A.
|
88 |
+
- PART I Item 1 15 OPERATIONS We have regional operations service centers that support
|
89 |
+
our operations, including customer contract and order processing, billing, credit
|
90 |
+
and collections, information processing, and vendor management and logistics.
|
91 |
+
The center in Ireland supports the African, Asia -Pacific, European, and Middle
|
92 |
+
East regions ; and the centers in Arlington, Virginia, Atlanta, Georgia , Charlotte,
|
93 |
+
North Carolina, Fargo, North Dakota, Fort Lauderdale, Florida, Redmond, Washington,
|
94 |
+
Reno, Nevada , and Puerto Rico support the America n region s. In addition to
|
95 |
+
our operations centers, we also operate datacenters throughout each of these regions
|
96 |
+
. We continue to identify and evaluate opportunities to expand our datacenter
|
97 |
+
locations and increase our server capacity to me et the evolving needs of our
|
98 |
+
customers, particularly given the growing demand for AI services . Our datacenters
|
99 |
+
depend on the availability of permitted and buildable land, predictable energy,
|
100 |
+
networking supplies, and servers, including graphics processing units (“ GPUs
|
101 |
+
”) and other components. Our devices are primarily manufactured by third -party
|
102 |
+
contract manufacturers. For the majority of our products, we have the ability
|
103 |
+
to use other manufacturers if a current vendor becomes unavailable or unable to
|
104 |
+
meet our requirements. However, some of our products contain certain components
|
105 |
+
for which there are very few qualified suppliers. Extended disruptions at these
|
106 |
+
suppliers could impact our ability to manufacture devices on time to meet consumer
|
107 |
+
demand. RESEARCH AND DEVELOPMENT Product and Service Development, and Intellectual
|
108 |
+
Property We develop most of our products and services internally through the following
|
109 |
+
engineering groups. • Cloud and AI – focuses on making IT professionals, developers,
|
110 |
+
partners, independent software vendors, and their systems more productive and
|
111 |
+
efficient through development of Azure AI platform and cloud infrastructure, server,
|
112 |
+
database, CRM, ERP, software development tools and services (including GitHub),
|
113 |
+
AI cognitive services, and other business process applications and services for
|
114 |
+
enterprises. • Strategic Missions and Technologies – focuses on incubating technical
|
115 |
+
products and support solutions with transformative potential for the future of
|
116 |
+
cloud computing and continued company growth across quantum computing, Azure Space
|
117 |
+
& Missions Engineering, telecommunications, and Microsoft F ederal Sales and Delivery.
|
118 |
+
• Experiences and Devices – focuses on delivering high value end -user experiences
|
119 |
+
across our products, services, and devices, including Microsoft 365, Windows,
|
120 |
+
Microsoft Teams, Search (including Microsoft Edge and Bing Chat) and other advertising
|
121 |
+
-based services, and the Surface line of devices. • Microsoft Security – focuses
|
122 |
+
on delivering a comprehensive portfolio of services that protect our customers’
|
123 |
+
digital infrastructure through cloud platform and application security, data protection
|
124 |
+
and governance, identity and network access, and device management . • Technology
|
125 |
+
and Research – focuses on fundamental research, product and business incubations
|
126 |
+
, and forward -looking AI innovations that span infrastructure, services, and
|
127 |
+
applications. • LinkedIn – focuses on our services that transform the way professionals
|
128 |
+
grow their network and find jobs and the way businesses hire, market, sell, and
|
129 |
+
learn. • Gaming – focuses on developing hardware, content, and services across
|
130 |
+
a large range of platforms to help grow our user base through game experiences
|
131 |
+
and social interaction. Internal development allows us to maintain competitive
|
132 |
+
advantages that come from product differentiation and closer technical control
|
133 |
+
over our products and services. It also gives us the freedom to decide which modifications
|
134 |
+
and enhancements are most impor tant and when they should be implemented. We strive
|
135 |
+
to obtain information as early as possible about changing usage patterns and hardware
|
136 |
+
advances that may affect software and hardware design. Before releasing new software
|
137 |
+
platforms, and as we make signifi cant modifications to existing platforms, we
|
138 |
+
provide application vendors with a range of resources and guidelines for development,
|
139 |
+
training, and testing. Generally, we also create product documentation internally.
|
140 |
+
We protect our intellectual property investments in a variety of ways. We work
|
141 |
+
actively in the U.S. and internationally to ensure the enforcement of copyright,
|
142 |
+
trademark, trade secret, and other protections that apply to our software and
|
143 |
+
hardware products, services, business plans, and branding. We are a leader among
|
144 |
+
technology companies in pursuing patents and currently have a portfolio of over
|
145 |
+
70,000 U.S. and international patents issued and over 19,000 pending
|
146 |
+
- source_sentence: Why is it essential for financial institutions to regularly recalibrate
|
147 |
+
their model parameters?
|
148 |
+
sentences:
|
149 |
+
- Balances at the beginning of the year reflect the segment structure as of December
|
150 |
+
31, 2023. 1 Non-core and Legacy (including Investment Bank). 2 Adjustments represent
|
151 |
+
certain consolidating entries, including those relating to entities that are managed
|
152 |
+
but are not owned or wholly owned by Credit Suisse. 3 Represents changes in portfolio
|
153 |
+
size. 4 Represents movements arising from internally driven updates to models
|
154 |
+
and recalibrations of model parameters specific only to Credit Suisse. 5 Represents
|
155 |
+
movements arising from externally driven updates to models and recalibrations
|
156 |
+
of model parameters specific only to Credit Suisse.
|
157 |
+
- Resolving potential conflicts necessarily depends on the facts and circumstances
|
158 |
+
of a particular situation and the application of experienced and informed judgment.
|
159 |
+
As a general matter, Conflicts Resolution reviews financing and advisory assignments
|
160 |
+
in Global Banking & Markets and certain of our investing, lending and other activities.
|
161 |
+
In addition, we have various transaction oversight committees, such as the Firmwide
|
162 |
+
Capital, Commitments and Suitability Committees and other committees that also
|
163 |
+
review new underwritings, loans, investments and structured products. These groups
|
164 |
+
and committees work with internal and external counsel and Compliance to evaluate
|
165 |
+
and address any actual or potential conflicts. The head of Conflicts Resolution
|
166 |
+
reports to our chief legal officer, who reports to our chief executive officer.
|
167 |
+
We regularly assess our policies and procedures that address conflicts of interest
|
168 |
+
in an effort to conduct our business in accordance with the highest ethical standards
|
169 |
+
and in compliance with all applicable laws, rules and regulations. For further
|
170 |
+
information about our risk management processes, see “Overview and Structure of
|
171 |
+
Risk Management” and “Risk Factors” in Part I, Item 1A of this Form 10-K.THE GOLDMAN
|
172 |
+
SACHS GROUP, INC. AND SUBSIDIARIES Management’s Discussion and Analysis Goldman
|
173 |
+
Sachs 2023 Form 10-K 123
|
174 |
+
- 'PART IV 85 ITEM 15. EXHIBITS AND FINANCIAL STATEMENT SCHEDULES INDEX Page No.
|
175 |
+
(a)(1) The followin g financial statements are included in Part II, Item 8: Mana
|
176 |
+
gement’s Report on Internal Control over Financial Reportin g 45 Report of Independent
|
177 |
+
Re gistered Public Accountin g Firm 46 Consolidated Statements of Financial Condition
|
178 |
+
as of December 31, 2023 and 2022 48 Consolidated Statements of Income for the
|
179 |
+
years ended December 31, 2023, 2022 and 2021 49 Consolidated Statements of Changes
|
180 |
+
in Partnership Capital Subject to Mandatory Redemption for the years ended December
|
181 |
+
31, 2023, 2022 and 2021 50 Consolidated Statements of Cash Flows for the years
|
182 |
+
ended December 31, 2023, 2022 and 2021 51 Notes to Consolidated Financial Statements
|
183 |
+
52 (2) The followin g financial statements are included in Schedule I: Parent
|
184 |
+
Compan y Only Condensed Statements of Financial Condition as of December 31, 2023
|
185 |
+
and 2022 90 Parent Company Only Condensed Statements of Income for the years ended
|
186 |
+
December 31, 2023, 2022 and 2021 91 Parent Company Only Condensed Statements of
|
187 |
+
Cash Flows for the years ended December 31, 2023, 2022 and 2021 92 Other schedules
|
188 |
+
are omitted because they are not required, inapplicable, or the information is
|
189 |
+
otherwise shown in the Consolidated Financial Statements or notes thereto. (b)
|
190 |
+
Exhibits Reference is made to the Exhibit Index hereinafter contained.'
|
191 |
+
- source_sentence: '**Comparative Analysis**: Compare the share volumes of GlaxoSmithKline
|
192 |
+
Pharmaceuticals Ltd. and Hindustan Zinc Ltd. and discuss what this might suggest
|
193 |
+
about investor interest in these companies.'
|
194 |
+
sentences:
|
195 |
+
- 'Institutional active AUM ended 2023 at $1.9 trillion, reflecting $87 billion
|
196 |
+
of net inflows, driven by the funding of several significant outsourcing mandates
|
197 |
+
and continued growth in our LifePath® target-date and private markets platforms.
|
198 |
+
Multi-asset net inflows of $86 billion reflected continued growth from significant
|
199 |
+
pension outsourcing mandates and LifePath target-date offerings. Fixed income
|
200 |
+
net inflows of $5 billion similarly reflected the funding of insurance outsourcing
|
201 |
+
mandates. Equity net outflows of $13 billion were primarily from quantitative
|
202 |
+
equity strategies. Alternatives net inflows of $10 billion were led by infrastructure,
|
203 |
+
private credit and private equity. Excluding return of capital and investment
|
204 |
+
of $7 billion, alternatives net inflows were $17 billion. At year-end, BlackRock
|
205 |
+
had approximately $32 billion of non-fee paying, unfunded, uninvested commitments
|
206 |
+
to deploy for institutional clients, which is not included in AUM. Institutional
|
207 |
+
active represented 21% of long-term AUM and 19% of long-term base fees and securities
|
208 |
+
lending revenue for 2023. Institutional index AUM totaled $2.9 trillion at December
|
209 |
+
31, 2023, reflecting $55 billion of net outflows, driven by equities. Institutional
|
210 |
+
index represented 31% of long-term AUM and 7% of long-term base fees and securities
|
211 |
+
lending revenue for 2023. The Company’s institutional clients consist of the following:
|
212 |
+
•Pensions, Foundations and Endowments BlackRock is among the world’s largest managers
|
213 |
+
of pension plan assets with $3.0 trillion, or 63%, of long-term institutional
|
214 |
+
AUM managed for defined benefit, defined contribution and other pension plans
|
215 |
+
for corporations, governments and unions at December 31, 2023. The market landscape
|
216 |
+
continues to shift from defined benefit to defined contribution, and our defined
|
217 |
+
contribution channel represented $1.5 trillion of total pension AUM. BlackRock
|
218 |
+
remains well positioned for the on-going evolution of the defined contribution
|
219 |
+
market and demand for outcome-oriented investments. An additional $83 billion,
|
220 |
+
or 2%, of long-term institutional AUM was managed for other tax-exempt investors,
|
221 |
+
including charities, foundations and endowments. •Official Institutions BlackRock
|
222 |
+
managed $272 billion, or 6%, of long-term institutional AUM for official institutions,
|
223 |
+
including central banks, sovereign wealth funds, supranationals, multilateral
|
224 |
+
entities and government ministries and agencies at year-end 2023. These clients
|
225 |
+
often require specialized investment advice, the use of customized benchmarks
|
226 |
+
and training support. •Financial and Other Institutions BlackRock is a top independent
|
227 |
+
manager of assets for insurance companies, which accounted for $650 billion, or
|
228 |
+
13%, of long-term institutional AUM at year-end 2023. Assets managed for other
|
229 |
+
taxable institutions, including corporations, banks and third-party fund sponsors
|
230 |
+
for which the Company provides sub-advisory services, totaled $773 billion, or
|
231 |
+
16%, of long-term institutional AUM at year-end. 5'
|
232 |
+
- 'Fair Value Option At December 31, 2023 and 2022, the Company elected the fair
|
233 |
+
value option for certain investments in CLOs of approximately $42 million and
|
234 |
+
$52 million, respectively, reported within investments. In addition, the Company
|
235 |
+
elected the fair value option for bank loans and borrowings of a consolidated
|
236 |
+
CLO, recorded within investments and other liabilities, respectively. The following
|
237 |
+
table summarizes the information related to these bank loans and borrowings at
|
238 |
+
December 31, 2023 and 2022: December 31, December 31, (in millions) 2023 2022
|
239 |
+
CLO Bank loans: Aggregate principal amounts outstanding $ 203 $ 238 Fair value
|
240 |
+
194 234 Aggregate unpaid principal balance in excess of (less than) fair value
|
241 |
+
$ 9 $ 4 CLO Borrowings: Aggregate principal amounts outstanding $ 190 $ 245 Fair
|
242 |
+
value $ 180 $ 245 At December 31, 2023, the principal amounts outstanding of the
|
243 |
+
borrowings issued by the CLOs mature in 2030 and may be repaid prior to maturity
|
244 |
+
at any time. During the year ended December 31, 2023 and 2022, the net gains (losses)
|
245 |
+
from the change in fair value of the bank loans and borrowings held by the consolidated
|
246 |
+
CLO were not material and were recorded in net gain (loss) on the consolidated
|
247 |
+
statements of income. The change in fair value of the assets and liabilities included
|
248 |
+
interest income and expense, respectively. 8. Derivatives and Hedging The Company
|
249 |
+
maintains a program to enter into exchange traded futures as a macro hedging strategy
|
250 |
+
to hedge market price and interest rate exposures with respect to its total portfolio
|
251 |
+
of seed investments in sponsored investment products. At December 31, 2023 and
|
252 |
+
2022, the Company had outstanding exchange traded futures related to this macro
|
253 |
+
hedging strategy with aggregate notional values of approximately $1.8 billion
|
254 |
+
and $1.5 billion, with expiration dates during the first quarter of 2024 and 2023,
|
255 |
+
respectively. In addition, beginning in the first quarter of 2023, the Company
|
256 |
+
entered into futures to economically hedge the exposure to market movements on
|
257 |
+
certain deferred cash compensation plans. At December 31, 2023 , the Company had
|
258 |
+
outstanding exchange traded futures with aggregate notional values related to
|
259 |
+
its deferred cash compensation hedging program of approximately $204 million,
|
260 |
+
with expiration dates during the first quarter of 2024. Changes in the value of
|
261 |
+
the futures contracts are recognized as gains or losses within nonoperating income
|
262 |
+
(expense). Variation margin payments, which represent settlements of profit/loss,
|
263 |
+
are generally received or made daily, and are reflected in other assets and other
|
264 |
+
liabilities on the consolidated statements of financial condition. These amounts
|
265 |
+
were not material as of December 31, 2023 and 2022. The Company executes forward
|
266 |
+
foreign currency exchange contracts to mitigate the risk of certain foreign exchange
|
267 |
+
movements. At December 31, 2023 and 2022, the Company had outstanding forward
|
268 |
+
foreign currency exchange contracts with aggregate notional values of approximately
|
269 |
+
$3.1 billion and $2.2 billion, with expiration dates in January 2024 and January
|
270 |
+
2023, respectively. At both December 31, 2023 and 2022, the Company had a derivative
|
271 |
+
providing credit protection with a notional amount of approximately $17 million
|
272 |
+
to a counterparty, representing the Company’s maximum risk of loss with respect
|
273 |
+
to the derivative. The Company carries the derivative at fair value based on the
|
274 |
+
expected discounted future cash outflows under the arrangement.'
|
275 |
+
- 67 1,845 0.00% Punjab National Bank 2,894 1,822 0.00% Bayer CropScience Ltd. 33
|
276 |
+
1,762 0.00% Central Depository Services India Ltd. 122 1,652 0.00% Balrampur Chini
|
277 |
+
Mills Ltd. 352 1,650 0.00% Kansai Nerolac Paints Ltd. 299 1,642 0.00% Finolex
|
278 |
+
Cables Ltd. 150 1,570 0.00% Jubilant Ingrevia Ltd. 298 1,549 0.00% EID Parry India
|
279 |
+
Ltd. 272 1,533 0.00% Suven Pharmaceuticals Ltd. 252 1,492 0.00% Indian Energy
|
280 |
+
Exchange Ltd. 957 1,483 0.00% PNB Housing Finance Ltd. 205 1,418 0.00% ICICI Securities
|
281 |
+
Ltd. 188 1,394 0.00% GlaxoSmithKline Pharmaceuticals Ltd. 78 1,355 0.00% DCM Shriram
|
282 |
+
Ltd. 121 1,332 0.00% Aditya Birla Capital Ltd. 536 1,283 0.00% Relaxo Footwears
|
283 |
+
Ltd. 114 1,263 0.00% Birlasoft Ltd. 288 1,260 0.00% Intellect Design Arena Ltd.
|
284 |
+
159 1,218 0.00% Amber Enterprises India Ltd. 44 1,210 0.00% Indiabulls Housing
|
285 |
+
Finance Ltd. 735 1,106 0.00% Ajanta Pharma Ltd. 61 1,100 0.00% NCC Ltd. 701 1,043
|
286 |
+
0.00% V-Guard Industries Ltd. 281 968 0.00% Infibeam Avenues Ltd. 4,918 944 0.00%
|
287 |
+
Orient Electric Ltd. 303 884 0.00% Alok Industries Ltd. 4,187 834 0.00% Edelweiss
|
288 |
+
Financial Services Ltd. 1,357 824 0.00% Hindustan Zinc Ltd. 211 790 0.00% Natco
|
289 |
+
Pharma Ltd. 92 777 0.00% Chambal Fertilisers & Chemicals Ltd. 219 728 0.00% General
|
290 |
+
Insurance Corp. of India 311 692 0.00% Firstsource Solutions Ltd. 439 681 0.00%
|
291 |
+
Granules India Ltd. 169 612 0.00% Metropolis Healthcare Ltd. 33 601 0.00% Whirlpool
|
292 |
+
of India Ltd. 31 557 0.00% Nuvama Wealth Management Ltd. 15 518 0.00% Quess Corp.,
|
293 |
+
Ltd. 87 477 0.00% Graphite India Ltd. 95 463 0.00% IndiaMart InterMesh Ltd. 12
|
294 |
+
412 0.00% Strides Pharma Science Ltd. 76 399 0.00%
|
295 |
+
- source_sentence: '**Investment Strategies**: What investment strategies might be
|
296 |
+
appropriate for a portfolio that includes companies like Paycom Software, Inc.
|
297 |
+
and Jabil, Inc.?'
|
298 |
+
sentences:
|
299 |
+
- Nacional del Cobre de Chile $200,000 3.75% 15/1/2031 181,259 0.01% Corp. Nacional
|
300 |
+
del Cobre de Chile $200,000 4.88% 4/11/2044 180,292 0.01% Falabella SA $200,000
|
301 |
+
3.75% 30/10/2027 179,533 0.01% Inversiones CMPC SA $200,000 3.85% 13/1/2030 178,796
|
302 |
+
0.01% Chile Government International Bond $200,000 4.34% 7/3/2042 177,818 0.01%
|
303 |
+
Corp. Nacional del Cobre de Chile $200,000 3.15% 14/1/2030 177,141 0.01% Chile
|
304 |
+
Government International Bond €200,000 0.83% 2/7/2031 171,492 0.00% Engie Energia
|
305 |
+
Chile SA $200,000 3.40% 28/1/2030 166,435 0.00% Empresa de Transporte de Pasajeros
|
306 |
+
Metro SA $200,000 4.70% 7/5/2050 166,104 0.00% Chile Government International
|
307 |
+
Bond $200,000 2.55% 27/7/2033 163,560 0.00% Chile Government International Bond
|
308 |
+
$200,000 4.00% 31/1/2052 163,452 0.00% Corp. Nacional del Cobre de Chile $200,000
|
309 |
+
3.70% 30/1/2050 150,948 0.00% GNL Quintero SA $152,960 4.63% 31/7/2029 148,833
|
310 |
+
0.00% Alfa Desarrollo SpA $199,014 4.55% 27/9/2051 146,111 0.00% Banco Santander
|
311 |
+
Chile $150,000 2.70% 10/1/2025 143,340 0.00% Corp. Nacional del Cobre de Chile
|
312 |
+
$200,000 3.15% 15/1/2051 140,129 0.00% Chile Government International Bond €150,000
|
313 |
+
0.56% 21/1/2029 136,242 0.00% Bonos de la Tesoreria de la Republica en pesos CLP105,000,000
|
314 |
+
5.10% 15/7/2050 133,264 0.00% Chile Government International Bond $200,000 3.10%
|
315 |
+
22/1/2061 130,907 0.00% Bonos de la Tesoreria de la Republica en pesos CLP100,000,000
|
316 |
+
5.80% 1/6/2024 123,512 0.00% Chile Government International Bond $150,000 3.63%
|
317 |
+
30/10/2042 121,567 0.00% Chile Government International Bond €100,000 1.63% 30/1/2025
|
318 |
+
105,315 0.00% Chile Government International Bond €100,000 1.30% 26/7/2036 78,767
|
319 |
+
0.00% Chile Government International Bond €100,000 1.25% 22/1/2051 56,916 0.00%
|
320 |
+
Corp.
|
321 |
+
- LLC $150,000 2.90% 1/3/2030 128,400 0.01% Burlington Northern Santa Fe LLC $150,000
|
322 |
+
4.05% 15/6/2048 128,392 0.01% Leland Stanford Junior University $150,000 3.65%
|
323 |
+
1/5/2048 128,280 0.01% Southern California Edison Co. $175,000 3.60% 1/2/2045
|
324 |
+
128,168 0.01% John Deere Financial, Inc. CAD195,000 1.34% 8/9/2027 127,858 0.01%
|
325 |
+
Kraft Heinz Foods Co. $150,000 4.38% 1/6/2046 127,812 0.01% Masco Corp. $150,000
|
326 |
+
1.50% 15/2/2028 127,754 0.01% Parker-Hannifin Corp. $150,000 4.10% 1/3/2047 127,588
|
327 |
+
0.01% Aircastle Ltd. $150,000 2.85% 26/1/2028 127,563 0.01% UnitedHealth Group,
|
328 |
+
Inc. $140,000 4.25% 15/3/2043 127,393 0.01% Amgen, Inc. $140,000 2.20% 21/2/2027
|
329 |
+
127,198 0.01% Schlumberger Holdings Corp. $135,000 3.90% 17/5/2028 127,179 0.01%
|
330 |
+
Essential Utilities, Inc. $150,000 2.70% 15/4/2030 127,082 0.01% Starbucks Corp.
|
331 |
+
$150,000 2.25% 12/3/2030 126,940 0.01% Charles Schwab Corp. $125,000 5.85% 19/5/2034
|
332 |
+
126,868 0.01% Comcast Corp. $200,000 2.99% 1/11/2063 126,717 0.01% Entergy Texas,
|
333 |
+
Inc. $160,000 1.75% 15/3/2031 126,697 0.01% Exelon Corp. $125,000 5.63% 15/6/2035
|
334 |
+
126,526 0.01% Uniform Mortgage Backed Securities $123,205 6.50% 1/11/2052 126,499
|
335 |
+
0.01% Dell International LLC/EMC Corp. $125,000 5.75% 1/2/2033 126,258 0.01% Government
|
336 |
+
National Mortgage Association $131,748 4.00% 20/3/2049 126,098 0.01% HCA, Inc.
|
337 |
+
$125,000 5.88% 1/2/2029 125,650 0.01% PG&E Wildfire Recovery Funding LLC $132,193
|
338 |
+
3.59% 1/6/2030 125,330 0.01% STERIS Irish FinCo UnLtd Co. $150,000 2.70% 15/3/2031
|
339 |
+
125,236 0.01% Uniform Mortgage Backed Securities $137,046 3.50% 1/6/2050 125,181
|
340 |
+
0.01%
|
341 |
+
- 94 39,148 0.05% Teradyne, Inc. 350 38,965 0.05% Splunk, Inc. 363 38,511 0.05%
|
342 |
+
Zoom Video Communications, Inc. Class A 562 38,149 0.05% Paycom Software, Inc.
|
343 |
+
117 37,585 0.04% Entegris, Inc. 339 37,568 0.04% NetApp, Inc. 480 36,672 0.04%
|
344 |
+
Pinterest, Inc. Class A 1,326 36,253 0.04% PTC, Inc. 243 34,579 0.04% Jabil, Inc.
|
345 |
+
288 31,084 0.04% Akamai Technologies, Inc. 345 31,005 0.04% Lattice Semiconductor
|
346 |
+
Corp. 310 29,782 0.04% SS&C Technologies Holdings, Inc. 487 29,512 0.04% Zscaler,
|
347 |
+
Inc. 198 28,967 0.03% Flex Ltd. 1,027 28,386 0.03% Unity Software, Inc. 648 28,136
|
348 |
+
0.03% EPAM Systems, Inc. 125 28,094 0.03% Manhattan Associates, Inc. 138 27,583
|
349 |
+
0.03% Western Digital Corp. 726 27,537 0.03% Seagate Technology Holdings plc 441
|
350 |
+
27,285 0.03% Amdocs Ltd. 272 26,887 0.03% Check Point Software Technologies Ltd.
|
351 |
+
211 26,506 0.03% GoDaddy, Inc. Class A 347 26,070 0.03% Match Group, Inc. 620
|
352 |
+
25,947 0.03% Super Micro Computer, Inc. 103 25,673 0.03% Open Text Corp. 613 25,525
|
353 |
+
0.03% Dynatrace, Inc. 495 25,478 0.03% Twilio, Inc. Class A 381 24,239 0.03% Gen
|
354 |
+
Digital, Inc. 1,262 23,410 0.03% Qorvo, Inc. 228 23,263 0.03% Okta, Inc. Class
|
355 |
+
A 335 23,232 0.03% Pure Storage, Inc. Class A 630 23,197 0.03% DocuSign, Inc.
|
356 |
+
Class A 452 23,093 0.03% Ceridian HCM Holding, Inc. 332 22,234 0.03% Black Knight,
|
357 |
+
Inc. 345 20,607 0.02% F5, Inc. 138 20,184 0.02% Vertiv Holdings Co. Class A 743
|
358 |
+
18,404 0.02% Arrow Electronics, Inc. 127 18,190 0.02% Toast, Inc. Class A 799
|
359 |
+
18,033 0.02% ZoomInfo Technologies, Inc. Class A 705 17,900 0.02% Globant SA 94
|
360 |
+
16,894 0.02% National Instruments Corp. 288 16,531 0.02% Wolfspeed, Inc. 285 15,843
|
361 |
+
0.02% Dropbox, Inc. Class A 592 15,789 0.02% Rambus, Inc. 246 15,786 0.02% SPS
|
362 |
+
Commerce, Inc. 81 15,557 0.02% Universal Display Corp. 105 15,134 0.02%
|
363 |
+
- source_sentence: How does the diversification of investments across different currencies
|
364 |
+
impact financial risk?
|
365 |
+
sentences:
|
366 |
+
- 20/9/2023 4,504 0.00% GBP 305,720 USD (385,212) JPMorgan Chase Bank 20/9/2023
|
367 |
+
3,544 0.00% EUR 602,840 USD (659,854) State Street Bank & Trust Co. 20/9/2023
|
368 |
+
435 0.00% JPY 67,590,000 USD (473,571) JPMorgan Chase Bank 20/9/2023 (176) (0.00%)
|
369 |
+
GBP 378,925 USD (483,052) State Street Bank & Trust Co. 20/9/2023 (1,208) (0.00%)
|
370 |
+
GBP 382,825 USD (488,055) BNP Paribas 20/9/2023 (1,251) (0.00%) EUR 480,370 USD
|
371 |
+
(528,752) State Street Bank & Trust Co. 20/9/2023 (2,604) (0.00%) JPY 68,925,000
|
372 |
+
USD (489,188) State Street Bank & Trust Co. 20/9/2023 (6,443) (0.00%) JPY 43,800,000
|
373 |
+
USD (319,166) JPMorgan Chase Bank 20/9/2023 (12,395) (0.00%) JPY 91,700,000 USD
|
374 |
+
(657,807) JPMorgan Chase Bank 20/9/2023 (15,547) (0.00%) JPY 639,066,394 USD (4,648,059)
|
375 |
+
JPMorgan Chase Bank 20/9/2023 (172,087) (0.00%) Total OTC Financial Derivative
|
376 |
+
Instruments 545,977 0.00% Total Investments 17,991,067,179 98.73% Fair Value US
|
377 |
+
Dollars ($)% of Total Net Assets Other Assets and Liabilities 232,296,305 1.27%
|
378 |
+
Net Assets 18,223,363,484 100.00%
|
379 |
+
- $20,000 2.60% 1/5/2031 16,394 0.04% Wyeth LLC $15,000 5.95% 1/4/2037 16,387 0.04%
|
380 |
+
Comcast Corp. $20,000 1.95% 15/1/2031 16,352 0.04% Wells Fargo & Co. $20,000 4.40%
|
381 |
+
14/6/2046 16,296 0.04% Home Depot, Inc. $20,000 1.88% 15/9/2031 16,269 0.04% Baxter
|
382 |
+
International, Inc. $20,000 2.54% 1/2/2032 16,201 0.04% NIKE, Inc. $20,000 3.38%
|
383 |
+
27/3/2050 16,199 0.04% Citigroup, Inc. $15,000 6.68% 13/9/2043 16,179 0.04% Bank
|
384 |
+
of America Corp. $20,000 3.95% 23/1/2049 16,170 0.04% JPMorgan Chase & Co. $20,000
|
385 |
+
3.90% 23/1/2049 16,163 0.04% BlackRock, Inc. $20,000 2.10% 25/2/2032 16,138 0.04%
|
386 |
+
Goldman Sachs Group, Inc. $15,000 6.13% 15/2/2033 16,116 0.04% Lowe's Cos, Inc.
|
387 |
+
$20,000 4.45% 1/4/2062 16,110 0.04% UnitedHealth Group, Inc. $20,000 3.70% 15/8/2049
|
388 |
+
16,101 0.04% Lowe's Cos, Inc. $20,000 4.05% 3/5/2047 16,078 0.04% Bristol-Myers
|
389 |
+
Squibb Co. $20,000 1.45% 13/11/2030 16,065 0.04% Amazon.com, Inc. $25,000 2.70%
|
390 |
+
3/6/2060 16,054 0.04% US Bancorp $20,000 2.68% 27/1/2033 16,035 0.04% Bank of
|
391 |
+
America Corp. $15,000 5.88% 7/2/2042 15,990 0.04% Bank of America Corp. $20,000
|
392 |
+
2.30% 21/7/2032 15,988 0.04% General Motors Financial Co., Inc. $20,000 2.70%
|
393 |
+
10/6/2031 15,945 0.04% Comcast Corp. $20,000 1.50% 15/2/2031 15,918 0.04% Newmont
|
394 |
+
Corp. $15,000 6.25% 1/10/2039 15,854 0.04% Paramount Global $15,000 7.88% 30/7/2030
|
395 |
+
15,846 0.04% Gilead Sciences, Inc. $15,000 5.65% 1/12/2041 15,767 0.04% United
|
396 |
+
Parcel Service, Inc. $15,000 5.30% 1/4/2050 15,759 0.04% Comcast Corp. $15,000
|
397 |
+
5.65% 15/6/2035 15,725 0.04% VMware, Inc. $20,000 2.20% 15/8/2031 15,716 0.04%
|
398 |
+
Oracle Corp. $20,000 4.13% 15/5/2045 15,702 0.04% Hewlett Packard Enterprise Co.
|
399 |
+
$15,000 6.35% 15/10/2045 15,617 0.04% Alphabet, Inc.
|
400 |
+
- 'In addition, the restriction on liens in the GSFC 2008 Indenture applies only
|
401 |
+
to liens that secure debt for borrowed money. For example, liens imposed by operation
|
402 |
+
of law, such as liens to secure statutory obligations for taxes or workers’ compensation
|
403 |
+
benefits, or liens the Company creates to secure obligations to pay legal judgments
|
404 |
+
or surety bonds, would not be covered by this restriction. Modification of the
|
405 |
+
Debt Indenture and Waiver of Covenants There are four types of changes GSFC and
|
406 |
+
the Company can make to the GSFC 2008 Indenture and the debt securities or series
|
407 |
+
of debt securities and related guarantees issued under the GSFC 2008 Indenture.
|
408 |
+
Changes Requiring Each Holder’s Approval First, there are changes that cannot
|
409 |
+
be made without the approval of the holder of each debt security affected by the
|
410 |
+
change under the GSFC 2008 Indenture. Here is a list of those types of changes:
|
411 |
+
• change the stated maturity for any principal or interest payment on a debt security;
|
412 |
+
• reduce the principal amount, the amount payable on acceleration of the stated
|
413 |
+
maturity after a default, the interest rate or the redemption price for a debt
|
414 |
+
security; • permit redemption of a debt security if not previously permitted;
|
415 |
+
• impair any right a holder may have to require repayment of its debt security;
|
416 |
+
• change the currency of any payment on a debt security; • change the place of
|
417 |
+
payment on a debt security; • impair a holder’s right to sue for payment of any
|
418 |
+
amount due on its debt security; • reduce the percentage in principal amount of
|
419 |
+
the debt securities of any one or more affected series, taken • separately or
|
420 |
+
together, as applicable, and whether comprising the same or different series or
|
421 |
+
less than all of the debt securities of a series, the approval of whose holders
|
422 |
+
is needed to change the applicable debt indenture or those debt securities; •
|
423 |
+
reduce the percentage in principal amount of the debt securities of any one or
|
424 |
+
more affected series, taken separately or together, as applicable, and whether
|
425 |
+
comprising the same or different series or less than all of the debt securities
|
426 |
+
of a series, the consent of whose holders is needed to waive GSFC’s compliance
|
427 |
+
with the applicable debt indenture or to waive defaults; and • change the provisions
|
428 |
+
of the applicable debt indenture dealing with modification and waiver in any other
|
429 |
+
respect, except to increase any required percentage referred to above or to add
|
430 |
+
to -59-'
|
431 |
+
datasets:
|
432 |
+
- sujet-ai/Sujet-Financial-RAG-EN-Dataset
|
433 |
+
pipeline_tag: sentence-similarity
|
434 |
+
library_name: sentence-transformers
|
435 |
+
metrics:
|
436 |
+
- cosine_accuracy@1
|
437 |
+
- cosine_accuracy@3
|
438 |
+
- cosine_accuracy@5
|
439 |
+
- cosine_accuracy@10
|
440 |
+
- cosine_precision@1
|
441 |
+
- cosine_precision@3
|
442 |
+
- cosine_precision@5
|
443 |
+
- cosine_precision@10
|
444 |
+
- cosine_recall@1
|
445 |
+
- cosine_recall@3
|
446 |
+
- cosine_recall@5
|
447 |
+
- cosine_recall@10
|
448 |
+
- cosine_ndcg@10
|
449 |
+
- cosine_mrr@10
|
450 |
+
- cosine_map@100
|
451 |
+
model-index:
|
452 |
+
- name: SentenceTransformer based on nomic-ai/modernbert-embed-base
|
453 |
+
results:
|
454 |
+
- task:
|
455 |
+
type: information-retrieval
|
456 |
+
name: Information Retrieval
|
457 |
+
dataset:
|
458 |
+
name: ModernFinBERT RAG embed base
|
459 |
+
type: ModernFinBERT-RAG-embed-base
|
460 |
+
metrics:
|
461 |
+
- type: cosine_accuracy@1
|
462 |
+
value: 0.3812677388836329
|
463 |
+
name: Cosine Accuracy@1
|
464 |
+
- type: cosine_accuracy@3
|
465 |
+
value: 0.6329233680227058
|
466 |
+
name: Cosine Accuracy@3
|
467 |
+
- type: cosine_accuracy@5
|
468 |
+
value: 0.7123935666982024
|
469 |
+
name: Cosine Accuracy@5
|
470 |
+
- type: cosine_accuracy@10
|
471 |
+
value: 0.7918637653736992
|
472 |
+
name: Cosine Accuracy@10
|
473 |
+
- type: cosine_precision@1
|
474 |
+
value: 0.3812677388836329
|
475 |
+
name: Cosine Precision@1
|
476 |
+
- type: cosine_precision@3
|
477 |
+
value: 0.2109744560075686
|
478 |
+
name: Cosine Precision@3
|
479 |
+
- type: cosine_precision@5
|
480 |
+
value: 0.1424787133396405
|
481 |
+
name: Cosine Precision@5
|
482 |
+
- type: cosine_precision@10
|
483 |
+
value: 0.07918637653736992
|
484 |
+
name: Cosine Precision@10
|
485 |
+
- type: cosine_recall@1
|
486 |
+
value: 0.3812677388836329
|
487 |
+
name: Cosine Recall@1
|
488 |
+
- type: cosine_recall@3
|
489 |
+
value: 0.6329233680227058
|
490 |
+
name: Cosine Recall@3
|
491 |
+
- type: cosine_recall@5
|
492 |
+
value: 0.7123935666982024
|
493 |
+
name: Cosine Recall@5
|
494 |
+
- type: cosine_recall@10
|
495 |
+
value: 0.7918637653736992
|
496 |
+
name: Cosine Recall@10
|
497 |
+
- type: cosine_ndcg@10
|
498 |
+
value: 0.5891686849331125
|
499 |
+
name: Cosine Ndcg@10
|
500 |
+
- type: cosine_mrr@10
|
501 |
+
value: 0.5239367932603505
|
502 |
+
name: Cosine Mrr@10
|
503 |
+
- type: cosine_map@100
|
504 |
+
value: 0.5297544273648861
|
505 |
+
name: Cosine Map@100
|
506 |
+
---
|
507 |
+
|
508 |
+
# SentenceTransformer based on nomic-ai/modernbert-embed-base
|
509 |
+
|
510 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) on the [sujet-financial-rag-en-dataset](https://huggingface.co/datasets/sujet-ai/Sujet-Financial-RAG-EN-Dataset) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
511 |
+
|
512 |
+
## Model Details
|
513 |
+
|
514 |
+
### Model Description
|
515 |
+
- **Model Type:** Sentence Transformer
|
516 |
+
- **Base model:** [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) <!-- at revision 5960f1566fb7cb1adf1eb6e816639cf4646d9b12 -->
|
517 |
+
- **Maximum Sequence Length:** 8192 tokens
|
518 |
+
- **Output Dimensionality:** 768 dimensions
|
519 |
+
- **Similarity Function:** Cosine Similarity
|
520 |
+
- **Training Dataset:**
|
521 |
+
- [sujet-financial-rag-en-dataset](https://huggingface.co/datasets/sujet-ai/Sujet-Financial-RAG-EN-Dataset)
|
522 |
+
- **Language:** en
|
523 |
+
<!-- - **License:** Unknown -->
|
524 |
+
|
525 |
+
### Model Sources
|
526 |
+
|
527 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
528 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
529 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
530 |
+
|
531 |
+
### Full Model Architecture
|
532 |
+
|
533 |
+
```
|
534 |
+
SentenceTransformer(
|
535 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: ModernBertModel
|
536 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
537 |
+
(2): Normalize()
|
538 |
+
)
|
539 |
+
```
|
540 |
+
|
541 |
+
## Usage
|
542 |
+
|
543 |
+
### Direct Usage (Sentence Transformers)
|
544 |
+
|
545 |
+
First install the Sentence Transformers library:
|
546 |
+
|
547 |
+
```bash
|
548 |
+
pip install -U sentence-transformers
|
549 |
+
```
|
550 |
+
|
551 |
+
Then you can load this model and run inference.
|
552 |
+
```python
|
553 |
+
from sentence_transformers import SentenceTransformer
|
554 |
+
|
555 |
+
# Download from the 🤗 Hub
|
556 |
+
model = SentenceTransformer("sujet-ai/Fin-ModernBERT-RAG-base")
|
557 |
+
# Run inference
|
558 |
+
sentences = [
|
559 |
+
'How does the diversification of investments across different currencies impact financial risk?',
|
560 |
+
'20/9/2023 4,504 0.00% GBP 305,720 USD (385,212) JPMorgan Chase Bank 20/9/2023 3,544 0.00% EUR 602,840 USD (659,854) State Street Bank & Trust Co. 20/9/2023 435 0.00% JPY 67,590,000 USD (473,571) JPMorgan Chase Bank 20/9/2023 (176) (0.00%) GBP 378,925 USD (483,052) State Street Bank & Trust Co. 20/9/2023 (1,208) (0.00%) GBP 382,825 USD (488,055) BNP Paribas 20/9/2023 (1,251) (0.00%) EUR 480,370 USD (528,752) State Street Bank & Trust Co. 20/9/2023 (2,604) (0.00%) JPY 68,925,000 USD (489,188) State Street Bank & Trust Co. 20/9/2023 (6,443) (0.00%) JPY 43,800,000 USD (319,166) JPMorgan Chase Bank 20/9/2023 (12,395) (0.00%) JPY 91,700,000 USD (657,807) JPMorgan Chase Bank 20/9/2023 (15,547) (0.00%) JPY 639,066,394 USD (4,648,059) JPMorgan Chase Bank 20/9/2023 (172,087) (0.00%) Total OTC Financial Derivative Instruments 545,977 0.00% Total Investments 17,991,067,179 98.73% Fair Value US Dollars ($)% of Total Net Assets Other Assets and Liabilities 232,296,305 1.27% Net Assets 18,223,363,484 100.00%',
|
561 |
+
'In addition, the restriction on liens in the GSFC 2008 Indenture applies only to liens that secure debt for borrowed money. For example, liens imposed by operation of law, such as liens to secure statutory obligations for taxes or workers’ compensation benefits, or liens the Company creates to secure obligations to pay legal judgments or surety bonds, would not be covered by this restriction. Modification of the Debt Indenture and Waiver of Covenants There are four types of changes GSFC and the Company can make to the GSFC 2008 Indenture and the debt securities or series of debt securities and related guarantees issued under the GSFC 2008 Indenture. Changes Requiring Each Holder’s Approval First, there are changes that cannot be made without the approval of the holder of each debt security affected by the change under the GSFC 2008 Indenture. Here is a list of those types of changes: • change the stated maturity for any principal or interest payment on a debt security; • reduce the principal amount, the amount payable on acceleration of the stated maturity after a default, the interest rate or the redemption price for a debt security; • permit redemption of a debt security if not previously permitted; • impair any right a holder may have to require repayment of its debt security; • change the currency of any payment on a debt security; • change the place of payment on a debt security; • impair a holder’s right to sue for payment of any amount due on its debt security; • reduce the percentage in principal amount of the debt securities of any one or more affected series, taken • separately or together, as applicable, and whether comprising the same or different series or less than all of the debt securities of a series, the approval of whose holders is needed to change the applicable debt indenture or those debt securities; • reduce the percentage in principal amount of the debt securities of any one or more affected series, taken separately or together, as applicable, and whether comprising the same or different series or less than all of the debt securities of a series, the consent of whose holders is needed to waive GSFC’s compliance with the applicable debt indenture or to waive defaults; and • change the provisions of the applicable debt indenture dealing with modification and waiver in any other respect, except to increase any required percentage referred to above or to add to -59-',
|
562 |
+
]
|
563 |
+
embeddings = model.encode(sentences)
|
564 |
+
print(embeddings.shape)
|
565 |
+
# [3, 768]
|
566 |
+
|
567 |
+
# Get the similarity scores for the embeddings
|
568 |
+
similarities = model.similarity(embeddings, embeddings)
|
569 |
+
print(similarities.shape)
|
570 |
+
# [3, 3]
|
571 |
+
```
|
572 |
+
|
573 |
+
<!--
|
574 |
+
### Direct Usage (Transformers)
|
575 |
+
|
576 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
577 |
+
|
578 |
+
</details>
|
579 |
+
-->
|
580 |
+
|
581 |
+
<!--
|
582 |
+
### Downstream Usage (Sentence Transformers)
|
583 |
+
|
584 |
+
You can finetune this model on your own dataset.
|
585 |
+
|
586 |
+
<details><summary>Click to expand</summary>
|
587 |
+
|
588 |
+
</details>
|
589 |
+
-->
|
590 |
+
|
591 |
+
<!--
|
592 |
+
### Out-of-Scope Use
|
593 |
+
|
594 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
595 |
+
-->
|
596 |
+
|
597 |
+
## Evaluation
|
598 |
+
|
599 |
+
### Metrics
|
600 |
+
|
601 |
+
#### Information Retrieval
|
602 |
+
|
603 |
+
* Dataset: `ModernFinBERT-RAG-embed-base`
|
604 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
605 |
+
|
606 |
+
| Metric | Value |
|
607 |
+
|:--------------------|:-----------|
|
608 |
+
| cosine_accuracy@1 | 0.3813 |
|
609 |
+
| cosine_accuracy@3 | 0.6329 |
|
610 |
+
| cosine_accuracy@5 | 0.7124 |
|
611 |
+
| cosine_accuracy@10 | 0.7919 |
|
612 |
+
| cosine_precision@1 | 0.3813 |
|
613 |
+
| cosine_precision@3 | 0.211 |
|
614 |
+
| cosine_precision@5 | 0.1425 |
|
615 |
+
| cosine_precision@10 | 0.0792 |
|
616 |
+
| cosine_recall@1 | 0.3813 |
|
617 |
+
| cosine_recall@3 | 0.6329 |
|
618 |
+
| cosine_recall@5 | 0.7124 |
|
619 |
+
| cosine_recall@10 | 0.7919 |
|
620 |
+
| **cosine_ndcg@10** | **0.5892** |
|
621 |
+
| cosine_mrr@10 | 0.5239 |
|
622 |
+
| cosine_map@100 | 0.5298 |
|
623 |
+
|
624 |
+
<!--
|
625 |
+
## Bias, Risks and Limitations
|
626 |
+
|
627 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
628 |
+
-->
|
629 |
+
|
630 |
+
<!--
|
631 |
+
### Recommendations
|
632 |
+
|
633 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
634 |
+
-->
|
635 |
+
|
636 |
+
## Training Details
|
637 |
+
|
638 |
+
### Training Dataset
|
639 |
+
|
640 |
+
#### sujet-financial-rag-en-dataset
|
641 |
+
|
642 |
+
* Dataset: [sujet-financial-rag-en-dataset](https://huggingface.co/datasets/sujet-ai/Sujet-Financial-RAG-EN-Dataset) at [ec52315](https://huggingface.co/datasets/sujet-ai/Sujet-Financial-RAG-EN-Dataset/tree/ec523152632ce80949025488e17115020e8fe8c4)
|
643 |
+
* Size: 104,601 training samples
|
644 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
645 |
+
* Approximate statistics based on the first 1000 samples:
|
646 |
+
| | anchor | positive |
|
647 |
+
|:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
648 |
+
| type | string | string |
|
649 |
+
| details | <ul><li>min: 13 tokens</li><li>mean: 24.56 tokens</li><li>max: 50 tokens</li></ul> | <ul><li>min: 23 tokens</li><li>mean: 647.39 tokens</li><li>max: 1165 tokens</li></ul> |
|
650 |
+
* Samples:
|
651 |
+
| anchor | positive |
|
652 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
653 |
+
| <code>How does the Compensation Committee's role influence the stock awards granted to executive officers?</code> | <code>PART II Item 8 88 Stock Plans Stock awards entitle the holder to receive shares of Microsoft common stock as the award vests. Stock awards generally vest over a service period of four years or five years. Executive Incentive Plan Under the Executive Incentive Plan, the Compensation Committee approves stock awards to executive officers and certain senior executives. RSUs generally vest ratably over a service period of four years. PSUs generally vest over a performance period of thre e years. The number of shares the PSU holder receives is based on the extent to which the corresponding performance goals have been achieved. Activity for All Stock Plans The fair value of stock awards was estimated on the date of grant using the following assumptions: Year ended June 30, 2023 2022 2021 Dividends per share (quarterly amounts) $ 0.62 – 0.68 $ 0.56 – 0.62 $ 0.51 – 0.56 Interest rates 2.0% – 5.4% 0.03% – 3.6% 0.01% – 1.5% During fiscal year 2023 , the following activity occurred under our stock...</code> |
|
654 |
+
| <code>What is the fair value of the bond issued by CVS Health Corp., and how does it compare to the fair value of the bond issued by Walt Disney Co.?</code> | <code>445 Vanguard ESG Global Corporate Bond UCITS ETF Principal CouponMaturity DateFair Value US Dollars ($)% of Total Net Assets State Street Corp. $50,000 4.82% 26/1/2034 48,557 0.01% Baxalta, Inc. $50,000 4.00% 23/6/2025 48,515 0.01% Starbucks Corp. $50,000 3.80% 15/8/2025 48,426 0.01% Citigroup, Inc. $50,000 4.60% 9/3/2026 48,387 0.01% Athene Global Funding CAD70,000 2.10% 24/9/2025 48,344 0.01% Bank of America Corp. $50,000 4.25% 22/10/2026 48,257 0.01% PepsiCo, Inc. $50,000 3.60% 18/2/2028 48,191 0.01% Charles Schwab Corp. $50,000 3.85% 21/5/2025 48,183 0.01% JPMorgan Chase & Co. $50,000 4.13% 15/12/2026 48,165 0.01% Charter Communications Operating LLC/Charter Communications Operating Capital $60,000 5.50% 1/4/2063 48,151 0.01% US Bancorp $60,000 2.68% 27/1/2033 48,106 0.01% Chubb INA Holdings, Inc. $50,000 3.35% 3/5/2026 48,074 0.01% Bank of New York Mellon Corp. $50,000 3.00% 24/2/2025 48,071 0.01% Truist Financial Corp. $50,000 4.87% 26/1/2029 48,042 0.01% Truist Financial Corp. $...</code> |
|
655 |
+
| <code>Analyze the impact of currency fluctuations on the unrealized gains and losses reported in the forward currency exchange contracts.</code> | <code>15,216 141,230 0.01% Samsung Fire & Marine Insurance Co., Ltd. - Preference Shares 1,056 137,365 0.01% Samsung SDI Co., Ltd. - Preference Shares 546 133,014 0.01% NHN Corp. 7,096 132,480 0.01% Hanwha Corp. - Preference Shares 10,137 114,475 0.01% Amorepacific Corp. - Preference Shares 4,230 101,123 0.01% CJ CheilJedang Corp. - Preference Shares 576 59,276 0.00% Hanwha Galleria Corp. 47,521 54,711 0.00% - - 386,394,890 29.25% Total Equities 1,291,387,033 97.75% Total Transferable Securities 1,291,387,033 97.75% Number of Contracts Long/ (Short)Notional Amount Unrealised Gain/(Loss) US Dollar s ($)% of Total Net Assets Financial Derivative Instruments Dealt in on a Regulated Market (0.02%) (30 June 2022: (0.00%)) Futures (0.02%) (30 June 2022: (0.00%)) MSCI Pacific Ex-Japan Index September 2023 283 $20,595,251 (131,521) (0.01%) KOSPI 200 Index September 2023 138 KRW11,933,318,478 (141,212) (0.01%) Total Financial Derivative Instruments Dealt in on a Regulated Market (272,733) (0.02%) OTC...</code> |
|
656 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
657 |
+
```json
|
658 |
+
{
|
659 |
+
"scale": 20.0,
|
660 |
+
"similarity_fct": "cos_sim"
|
661 |
+
}
|
662 |
+
```
|
663 |
+
|
664 |
+
### Evaluation Dataset
|
665 |
+
|
666 |
+
#### sujet-financial-rag-en-dataset
|
667 |
+
|
668 |
+
* Dataset: [sujet-financial-rag-en-dataset](https://huggingface.co/datasets/sujet-ai/Sujet-Financial-RAG-EN-Dataset) at [ec52315](https://huggingface.co/datasets/sujet-ai/Sujet-Financial-RAG-EN-Dataset/tree/ec523152632ce80949025488e17115020e8fe8c4)
|
669 |
+
* Size: 1,057 evaluation samples
|
670 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
671 |
+
* Approximate statistics based on the first 1000 samples:
|
672 |
+
| | anchor | positive |
|
673 |
+
|:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
674 |
+
| type | string | string |
|
675 |
+
| details | <ul><li>min: 13 tokens</li><li>mean: 24.64 tokens</li><li>max: 52 tokens</li></ul> | <ul><li>min: 26 tokens</li><li>mean: 647.51 tokens</li><li>max: 1081 tokens</li></ul> |
|
676 |
+
* Samples:
|
677 |
+
| anchor | positive |
|
678 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
679 |
+
| <code>What was the net asset value per share for the EUR Distributing class as of 30 June 2022?</code> | <code>The accompanying notes form an integral part of the financial statements.559 Vanguard EUR Eurozone Government Bond UCITS ETFStatement of Assets and Liabilities EUR (€) EUR (€) As at 30 June As at 30 June Note 2023 2022 Current Assets Financial Assets at Fair Value Through Profit or Loss: Transferable Securities 3,17 1,719,130,585 1,249,469,080 Financial Derivative Instruments 3,17 — 23,742 Cash 3 11,990,422 14,558,520 Receivables: Interest and Dividends 12,715,254 5,193,434 Capital Shares Issued 27 9,190,562 Investments Sold 6,621,764 499,630 Margin Cash Due from Broker 3 3 56,198 Total Current Assets 1,750,458,055 1,278,991,166 Current Liabilities Financial Liabilities at Fair Value Through Profit or Loss: Financial Derivative Instruments 3,17 — 17,321 Bank Overdraft — 6,668 Payables and Other Liabilities: Capital Shares Redeemed 5,790,847 6,811,068 Investments Purchased 8,942,689 15,381,189 Management Fees Payable 12 99,689 69,769 Total Current Liabilities 14,833,225 22,286,015 Net A...</code> |
|
680 |
+
| <code>What factors could lead the Committee to determine that an employee's actions have resulted in a "material adverse impact" on the broader financial system?</code> | <code>Definitions Appendix The following capitalized terms are used in this Award Agreement with the following meanings: (a)“409A Deferred Compensation ” means a “deferral of compensation” or “deferred compensation” as those terms are defined in the regulations under Section 409A. (b)“Conflicted Employment ” means your employment at any U.S. Federal, state or local government, any non-U.S. government, any supranational or international organization, any self- regulatory organization, or any agency or instrumentality of any such government or organization, or any other employer (other than an “Accounting Firm” within the meaning of SEC Rule 2-01(f)(2) of Regulation S-X or any successor thereto) determined by the Committee, if, as a result of such employment, your continued holding of any Outstanding Short-Term RSUs would result in an actual or perceived conflict of interest. (c)“Failed to Consider Risk ” means that you participated (or otherwise oversaw or were responsible for, depending on t...</code> |
|
681 |
+
| <code>What financial implications could arise from a decrease in the pool of qualified drivers for a ridesharing platform?</code> | <code>In addition, changes in certain laws and regulations, including immigration, labor and employment laws or background check requirements, may result in a shift or decrease in the pool of qualified drivers, which may result in increased competition for qualified drivers or higher costs of recruitment, operation and retention. As part of our business operations or research and development efforts, data on the vehicle may be collected and drivers may be uncomfortable or unwilling to drive knowing that data is being collected. Other factors outside of our control, such as concerns about personal health and safety, increases in the price of gasoline, vehicles or insurance, or concerns about the availability of government or other assistance programs if drivers continue to drive on our platform, may also reduce the number of drivers on our platform or their utilization of our platform, or impact our ability to onboard new drivers. If we fail to attract qualified drivers on favorable terms, fa...</code> |
|
682 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
683 |
+
```json
|
684 |
+
{
|
685 |
+
"scale": 20.0,
|
686 |
+
"similarity_fct": "cos_sim"
|
687 |
+
}
|
688 |
+
```
|
689 |
+
|
690 |
+
### Training Hyperparameters
|
691 |
+
#### Non-Default Hyperparameters
|
692 |
+
|
693 |
+
- `eval_strategy`: steps
|
694 |
+
- `per_device_train_batch_size`: 64
|
695 |
+
- `per_device_eval_batch_size`: 64
|
696 |
+
- `gradient_accumulation_steps`: 8
|
697 |
+
- `learning_rate`: 0.0002
|
698 |
+
- `num_train_epochs`: 2
|
699 |
+
- `lr_scheduler_type`: cosine
|
700 |
+
- `warmup_ratio`: 0.1
|
701 |
+
- `bf16`: True
|
702 |
+
- `tf32`: True
|
703 |
+
- `load_best_model_at_end`: True
|
704 |
+
- `optim`: adamw_torch_fused
|
705 |
+
- `batch_sampler`: no_duplicates
|
706 |
+
|
707 |
+
#### All Hyperparameters
|
708 |
+
<details><summary>Click to expand</summary>
|
709 |
+
|
710 |
+
- `overwrite_output_dir`: False
|
711 |
+
- `do_predict`: False
|
712 |
+
- `eval_strategy`: steps
|
713 |
+
- `prediction_loss_only`: True
|
714 |
+
- `per_device_train_batch_size`: 64
|
715 |
+
- `per_device_eval_batch_size`: 64
|
716 |
+
- `per_gpu_train_batch_size`: None
|
717 |
+
- `per_gpu_eval_batch_size`: None
|
718 |
+
- `gradient_accumulation_steps`: 8
|
719 |
+
- `eval_accumulation_steps`: None
|
720 |
+
- `torch_empty_cache_steps`: None
|
721 |
+
- `learning_rate`: 0.0002
|
722 |
+
- `weight_decay`: 0.0
|
723 |
+
- `adam_beta1`: 0.9
|
724 |
+
- `adam_beta2`: 0.999
|
725 |
+
- `adam_epsilon`: 1e-08
|
726 |
+
- `max_grad_norm`: 1.0
|
727 |
+
- `num_train_epochs`: 2
|
728 |
+
- `max_steps`: -1
|
729 |
+
- `lr_scheduler_type`: cosine
|
730 |
+
- `lr_scheduler_kwargs`: {}
|
731 |
+
- `warmup_ratio`: 0.1
|
732 |
+
- `warmup_steps`: 0
|
733 |
+
- `log_level`: passive
|
734 |
+
- `log_level_replica`: warning
|
735 |
+
- `log_on_each_node`: True
|
736 |
+
- `logging_nan_inf_filter`: True
|
737 |
+
- `save_safetensors`: True
|
738 |
+
- `save_on_each_node`: False
|
739 |
+
- `save_only_model`: False
|
740 |
+
- `restore_callback_states_from_checkpoint`: False
|
741 |
+
- `no_cuda`: False
|
742 |
+
- `use_cpu`: False
|
743 |
+
- `use_mps_device`: False
|
744 |
+
- `seed`: 42
|
745 |
+
- `data_seed`: None
|
746 |
+
- `jit_mode_eval`: False
|
747 |
+
- `use_ipex`: False
|
748 |
+
- `bf16`: True
|
749 |
+
- `fp16`: False
|
750 |
+
- `fp16_opt_level`: O1
|
751 |
+
- `half_precision_backend`: auto
|
752 |
+
- `bf16_full_eval`: False
|
753 |
+
- `fp16_full_eval`: False
|
754 |
+
- `tf32`: True
|
755 |
+
- `local_rank`: 0
|
756 |
+
- `ddp_backend`: None
|
757 |
+
- `tpu_num_cores`: None
|
758 |
+
- `tpu_metrics_debug`: False
|
759 |
+
- `debug`: []
|
760 |
+
- `dataloader_drop_last`: False
|
761 |
+
- `dataloader_num_workers`: 0
|
762 |
+
- `dataloader_prefetch_factor`: None
|
763 |
+
- `past_index`: -1
|
764 |
+
- `disable_tqdm`: False
|
765 |
+
- `remove_unused_columns`: True
|
766 |
+
- `label_names`: None
|
767 |
+
- `load_best_model_at_end`: True
|
768 |
+
- `ignore_data_skip`: False
|
769 |
+
- `fsdp`: []
|
770 |
+
- `fsdp_min_num_params`: 0
|
771 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
772 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
773 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
774 |
+
- `deepspeed`: None
|
775 |
+
- `label_smoothing_factor`: 0.0
|
776 |
+
- `optim`: adamw_torch_fused
|
777 |
+
- `optim_args`: None
|
778 |
+
- `adafactor`: False
|
779 |
+
- `group_by_length`: False
|
780 |
+
- `length_column_name`: length
|
781 |
+
- `ddp_find_unused_parameters`: None
|
782 |
+
- `ddp_bucket_cap_mb`: None
|
783 |
+
- `ddp_broadcast_buffers`: False
|
784 |
+
- `dataloader_pin_memory`: True
|
785 |
+
- `dataloader_persistent_workers`: False
|
786 |
+
- `skip_memory_metrics`: True
|
787 |
+
- `use_legacy_prediction_loop`: False
|
788 |
+
- `push_to_hub`: False
|
789 |
+
- `resume_from_checkpoint`: None
|
790 |
+
- `hub_model_id`: None
|
791 |
+
- `hub_strategy`: every_save
|
792 |
+
- `hub_private_repo`: None
|
793 |
+
- `hub_always_push`: False
|
794 |
+
- `gradient_checkpointing`: False
|
795 |
+
- `gradient_checkpointing_kwargs`: None
|
796 |
+
- `include_inputs_for_metrics`: False
|
797 |
+
- `include_for_metrics`: []
|
798 |
+
- `eval_do_concat_batches`: True
|
799 |
+
- `fp16_backend`: auto
|
800 |
+
- `push_to_hub_model_id`: None
|
801 |
+
- `push_to_hub_organization`: None
|
802 |
+
- `mp_parameters`:
|
803 |
+
- `auto_find_batch_size`: False
|
804 |
+
- `full_determinism`: False
|
805 |
+
- `torchdynamo`: None
|
806 |
+
- `ray_scope`: last
|
807 |
+
- `ddp_timeout`: 1800
|
808 |
+
- `torch_compile`: False
|
809 |
+
- `torch_compile_backend`: None
|
810 |
+
- `torch_compile_mode`: None
|
811 |
+
- `dispatch_batches`: None
|
812 |
+
- `split_batches`: None
|
813 |
+
- `include_tokens_per_second`: False
|
814 |
+
- `include_num_input_tokens_seen`: False
|
815 |
+
- `neftune_noise_alpha`: None
|
816 |
+
- `optim_target_modules`: None
|
817 |
+
- `batch_eval_metrics`: False
|
818 |
+
- `eval_on_start`: False
|
819 |
+
- `use_liger_kernel`: False
|
820 |
+
- `eval_use_gather_object`: False
|
821 |
+
- `average_tokens_across_devices`: False
|
822 |
+
- `prompts`: None
|
823 |
+
- `batch_sampler`: no_duplicates
|
824 |
+
- `multi_dataset_batch_sampler`: proportional
|
825 |
+
|
826 |
+
</details>
|
827 |
+
|
828 |
+
### Training Logs
|
829 |
+
| Epoch | Step | Training Loss | Validation Loss | ModernFinBERT-RAG-embed-base_cosine_ndcg@10 |
|
830 |
+
|:---------:|:-------:|:-------------:|:---------------:|:-------------------------------------------:|
|
831 |
+
| 0 | 0 | - | - | 0.2812 |
|
832 |
+
| 0.0489 | 10 | 1.8949 | - | - |
|
833 |
+
| 0.0979 | 20 | 1.0738 | - | - |
|
834 |
+
| 0.1468 | 30 | 0.9147 | - | - |
|
835 |
+
| 0.1957 | 40 | 0.8194 | - | - |
|
836 |
+
| 0.2446 | 50 | 0.7847 | - | - |
|
837 |
+
| 0.2936 | 60 | 0.7428 | - | - |
|
838 |
+
| 0.3425 | 70 | 0.7587 | - | - |
|
839 |
+
| 0.3914 | 80 | 0.7769 | - | - |
|
840 |
+
| 0.4404 | 90 | 0.7319 | - | - |
|
841 |
+
| 0.4893 | 100 | 0.7199 | 0.7262 | 0.5395 |
|
842 |
+
| 0.5382 | 110 | 0.7085 | - | - |
|
843 |
+
| 0.5872 | 120 | 0.6726 | - | - |
|
844 |
+
| 0.6361 | 130 | 0.6954 | - | - |
|
845 |
+
| 0.6850 | 140 | 0.65 | - | - |
|
846 |
+
| 0.7339 | 150 | 0.6207 | - | - |
|
847 |
+
| 0.7829 | 160 | 0.6518 | - | - |
|
848 |
+
| 0.8318 | 170 | 0.6227 | - | - |
|
849 |
+
| 0.8807 | 180 | 0.6285 | - | - |
|
850 |
+
| 0.9297 | 190 | 0.6235 | - | - |
|
851 |
+
| 0.9786 | 200 | 0.6183 | 0.6158 | 0.5546 |
|
852 |
+
| 1.0294 | 210 | 0.6036 | - | - |
|
853 |
+
| 1.0783 | 220 | 0.5818 | - | - |
|
854 |
+
| 1.1272 | 230 | 0.5445 | - | - |
|
855 |
+
| 1.1761 | 240 | 0.5115 | - | - |
|
856 |
+
| 1.2251 | 250 | 0.4712 | - | - |
|
857 |
+
| 1.2740 | 260 | 0.449 | - | - |
|
858 |
+
| 1.3229 | 270 | 0.4457 | - | - |
|
859 |
+
| 1.3719 | 280 | 0.4763 | - | - |
|
860 |
+
| 1.4208 | 290 | 0.449 | - | - |
|
861 |
+
| 1.4697 | 300 | 0.4352 | 0.5674 | 0.5797 |
|
862 |
+
| 1.5187 | 310 | 0.4173 | - | - |
|
863 |
+
| 1.5676 | 320 | 0.4198 | - | - |
|
864 |
+
| 1.6165 | 330 | 0.3901 | - | - |
|
865 |
+
| 1.6654 | 340 | 0.4066 | - | - |
|
866 |
+
| 1.7144 | 350 | 0.3802 | - | - |
|
867 |
+
| 1.7633 | 360 | 0.3712 | - | - |
|
868 |
+
| 1.8122 | 370 | 0.3983 | - | - |
|
869 |
+
| 1.8612 | 380 | 0.3886 | - | - |
|
870 |
+
| 1.9101 | 390 | 0.4027 | - | - |
|
871 |
+
| **1.959** | **400** | **0.398** | **0.5435** | **0.5892** |
|
872 |
+
|
873 |
+
* The bold row denotes the saved checkpoint.
|
874 |
+
|
875 |
+
### Framework Versions
|
876 |
+
- Python: 3.10.13
|
877 |
+
- Sentence Transformers: 3.3.1
|
878 |
+
- Transformers: 4.48.0.dev0
|
879 |
+
- PyTorch: 2.5.1+cu124
|
880 |
+
- Accelerate: 1.0.1
|
881 |
+
- Datasets: 3.2.0
|
882 |
+
- Tokenizers: 0.21.0
|
883 |
+
|
884 |
+
## Citation
|
885 |
+
|
886 |
+
### BibTeX
|
887 |
+
|
888 |
+
#### Sentence Transformers
|
889 |
+
```bibtex
|
890 |
+
@inproceedings{reimers-2019-sentence-bert,
|
891 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
892 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
893 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
894 |
+
month = "11",
|
895 |
+
year = "2019",
|
896 |
+
publisher = "Association for Computational Linguistics",
|
897 |
+
url = "https://arxiv.org/abs/1908.10084",
|
898 |
+
}
|
899 |
+
```
|
900 |
+
|
901 |
+
#### MultipleNegativesRankingLoss
|
902 |
+
```bibtex
|
903 |
+
@misc{henderson2017efficient,
|
904 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
905 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
906 |
+
year={2017},
|
907 |
+
eprint={1705.00652},
|
908 |
+
archivePrefix={arXiv},
|
909 |
+
primaryClass={cs.CL}
|
910 |
+
}
|
911 |
+
```
|
912 |
+
|
913 |
+
<!--
|
914 |
+
## Glossary
|
915 |
+
|
916 |
+
*Clearly define terms in order to be accessible across audiences.*
|
917 |
+
-->
|
918 |
+
|
919 |
+
<!--
|
920 |
+
## Model Card Authors
|
921 |
+
|
922 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
923 |
+
-->
|
924 |
+
|
925 |
+
<!--
|
926 |
+
## Model Card Contact
|
927 |
+
|
928 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
929 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nomic-ai/modernbert-embed-base",
|
3 |
+
"architectures": [
|
4 |
+
"ModernBertModel"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 50281,
|
9 |
+
"classifier_activation": "gelu",
|
10 |
+
"classifier_bias": false,
|
11 |
+
"classifier_dropout": 0.0,
|
12 |
+
"classifier_pooling": "mean",
|
13 |
+
"cls_token_id": 50281,
|
14 |
+
"decoder_bias": true,
|
15 |
+
"deterministic_flash_attn": false,
|
16 |
+
"embedding_dropout": 0.0,
|
17 |
+
"eos_token_id": 50282,
|
18 |
+
"global_attn_every_n_layers": 3,
|
19 |
+
"global_rope_theta": 160000.0,
|
20 |
+
"gradient_checkpointing": false,
|
21 |
+
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 768,
|
23 |
+
"initializer_cutoff_factor": 2.0,
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"intermediate_size": 1152,
|
26 |
+
"layer_norm_eps": 1e-05,
|
27 |
+
"local_attention": 128,
|
28 |
+
"local_rope_theta": 10000.0,
|
29 |
+
"max_position_embeddings": 8192,
|
30 |
+
"mlp_bias": false,
|
31 |
+
"mlp_dropout": 0.0,
|
32 |
+
"model_type": "modernbert",
|
33 |
+
"norm_bias": false,
|
34 |
+
"norm_eps": 1e-05,
|
35 |
+
"num_attention_heads": 12,
|
36 |
+
"num_hidden_layers": 22,
|
37 |
+
"pad_token_id": 50283,
|
38 |
+
"position_embedding_type": "absolute",
|
39 |
+
"reference_compile": true,
|
40 |
+
"sep_token_id": 50282,
|
41 |
+
"sparse_pred_ignore_index": -100,
|
42 |
+
"sparse_prediction": false,
|
43 |
+
"torch_dtype": "float32",
|
44 |
+
"transformers_version": "4.48.0.dev0",
|
45 |
+
"vocab_size": 50368
|
46 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.3.1",
|
4 |
+
"transformers": "4.48.0.dev0",
|
5 |
+
"pytorch": "2.5.1+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29125d77bee036d9ade01aa18e58ce0202a6134ce48e3804aa3298aee60e1d1c
|
3 |
+
size 596070136
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": true,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "|||IP_ADDRESS|||",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": true,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<|padding|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"50254": {
|
20 |
+
"content": " ",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": true,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"50255": {
|
28 |
+
"content": " ",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
+
"50256": {
|
36 |
+
"content": " ",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"50257": {
|
44 |
+
"content": " ",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"50258": {
|
52 |
+
"content": " ",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"50259": {
|
60 |
+
"content": " ",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"50260": {
|
68 |
+
"content": " ",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"50261": {
|
76 |
+
"content": " ",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"50262": {
|
84 |
+
"content": " ",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": true,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"50263": {
|
92 |
+
"content": " ",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": true,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"50264": {
|
100 |
+
"content": " ",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": true,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"50265": {
|
108 |
+
"content": " ",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": true,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"50266": {
|
116 |
+
"content": " ",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": true,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"50267": {
|
124 |
+
"content": " ",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": true,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"50268": {
|
132 |
+
"content": " ",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": true,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"50269": {
|
140 |
+
"content": " ",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": true,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"50270": {
|
148 |
+
"content": " ",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": true,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"50271": {
|
156 |
+
"content": " ",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": true,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"50272": {
|
164 |
+
"content": " ",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": true,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"50273": {
|
172 |
+
"content": " ",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": true,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"50274": {
|
180 |
+
"content": " ",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": true,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"50275": {
|
188 |
+
"content": " ",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": true,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"50276": {
|
196 |
+
"content": " ",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": true,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"50277": {
|
204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": true,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"50278": {
|
212 |
+
"content": "|||PHONE_NUMBER|||",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": true,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"50279": {
|
220 |
+
"content": "<|endoftext|>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"50280": {
|
228 |
+
"content": "[UNK]",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"50281": {
|
236 |
+
"content": "[CLS]",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"50282": {
|
244 |
+
"content": "[SEP]",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"50283": {
|
252 |
+
"content": "[PAD]",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"50284": {
|
260 |
+
"content": "[MASK]",
|
261 |
+
"lstrip": true,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"50285": {
|
268 |
+
"content": "[unused0]",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": true,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"50286": {
|
276 |
+
"content": "[unused1]",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": true,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"50287": {
|
284 |
+
"content": "[unused2]",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": true,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"50288": {
|
292 |
+
"content": "[unused3]",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": true,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"50289": {
|
300 |
+
"content": "[unused4]",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": true,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"50290": {
|
308 |
+
"content": "[unused5]",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": true,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"50291": {
|
316 |
+
"content": "[unused6]",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": true,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"50292": {
|
324 |
+
"content": "[unused7]",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": true,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"50293": {
|
332 |
+
"content": "[unused8]",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": true,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"50294": {
|
340 |
+
"content": "[unused9]",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": true,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"50295": {
|
348 |
+
"content": "[unused10]",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": true,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"50296": {
|
356 |
+
"content": "[unused11]",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": true,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"50297": {
|
364 |
+
"content": "[unused12]",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": true,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"50298": {
|
372 |
+
"content": "[unused13]",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": true,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"50299": {
|
380 |
+
"content": "[unused14]",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": true,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"50300": {
|
388 |
+
"content": "[unused15]",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": true,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"50301": {
|
396 |
+
"content": "[unused16]",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": true,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"50302": {
|
404 |
+
"content": "[unused17]",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": true,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"50303": {
|
412 |
+
"content": "[unused18]",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"50304": {
|
420 |
+
"content": "[unused19]",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"50305": {
|
428 |
+
"content": "[unused20]",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"50306": {
|
436 |
+
"content": "[unused21]",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"50307": {
|
444 |
+
"content": "[unused22]",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"50308": {
|
452 |
+
"content": "[unused23]",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"50309": {
|
460 |
+
"content": "[unused24]",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"50310": {
|
468 |
+
"content": "[unused25]",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"50311": {
|
476 |
+
"content": "[unused26]",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"50312": {
|
484 |
+
"content": "[unused27]",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"50313": {
|
492 |
+
"content": "[unused28]",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": true,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"50314": {
|
500 |
+
"content": "[unused29]",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": true,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"50315": {
|
508 |
+
"content": "[unused30]",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": true,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"50316": {
|
516 |
+
"content": "[unused31]",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": true,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"50317": {
|
524 |
+
"content": "[unused32]",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": true,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"50318": {
|
532 |
+
"content": "[unused33]",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": true,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"50319": {
|
540 |
+
"content": "[unused34]",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": true,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"50320": {
|
548 |
+
"content": "[unused35]",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": true,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"50321": {
|
556 |
+
"content": "[unused36]",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": true,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"50322": {
|
564 |
+
"content": "[unused37]",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": true,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"50323": {
|
572 |
+
"content": "[unused38]",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": true,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"50324": {
|
580 |
+
"content": "[unused39]",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": true,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"50325": {
|
588 |
+
"content": "[unused40]",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": true,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"50326": {
|
596 |
+
"content": "[unused41]",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": true,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"50327": {
|
604 |
+
"content": "[unused42]",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": true,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"50328": {
|
612 |
+
"content": "[unused43]",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": true,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"50329": {
|
620 |
+
"content": "[unused44]",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": true,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"50330": {
|
628 |
+
"content": "[unused45]",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": true,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"50331": {
|
636 |
+
"content": "[unused46]",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": true,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"50332": {
|
644 |
+
"content": "[unused47]",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": true,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"50333": {
|
652 |
+
"content": "[unused48]",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": true,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"50334": {
|
660 |
+
"content": "[unused49]",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": true,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"50335": {
|
668 |
+
"content": "[unused50]",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": true,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"50336": {
|
676 |
+
"content": "[unused51]",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": true,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"50337": {
|
684 |
+
"content": "[unused52]",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": true,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"50338": {
|
692 |
+
"content": "[unused53]",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": true,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"50339": {
|
700 |
+
"content": "[unused54]",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": true,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"50340": {
|
708 |
+
"content": "[unused55]",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": true,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"50341": {
|
716 |
+
"content": "[unused56]",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": true,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"50342": {
|
724 |
+
"content": "[unused57]",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": true,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"50343": {
|
732 |
+
"content": "[unused58]",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": true,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"50344": {
|
740 |
+
"content": "[unused59]",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": true,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"50345": {
|
748 |
+
"content": "[unused60]",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": true,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"50346": {
|
756 |
+
"content": "[unused61]",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": true,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"50347": {
|
764 |
+
"content": "[unused62]",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": true,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"50348": {
|
772 |
+
"content": "[unused63]",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": true,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"50349": {
|
780 |
+
"content": "[unused64]",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": true,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"50350": {
|
788 |
+
"content": "[unused65]",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": true,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"50351": {
|
796 |
+
"content": "[unused66]",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": true,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"50352": {
|
804 |
+
"content": "[unused67]",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": true,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"50353": {
|
812 |
+
"content": "[unused68]",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": true,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"50354": {
|
820 |
+
"content": "[unused69]",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": true,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"50355": {
|
828 |
+
"content": "[unused70]",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"50356": {
|
836 |
+
"content": "[unused71]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"50357": {
|
844 |
+
"content": "[unused72]",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
+
},
|
851 |
+
"50358": {
|
852 |
+
"content": "[unused73]",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"50359": {
|
860 |
+
"content": "[unused74]",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"50360": {
|
868 |
+
"content": "[unused75]",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"50361": {
|
876 |
+
"content": "[unused76]",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"50362": {
|
884 |
+
"content": "[unused77]",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"50363": {
|
892 |
+
"content": "[unused78]",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"50364": {
|
900 |
+
"content": "[unused79]",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"50365": {
|
908 |
+
"content": "[unused80]",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"50366": {
|
916 |
+
"content": "[unused81]",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"50367": {
|
924 |
+
"content": "[unused82]",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
}
|
931 |
+
},
|
932 |
+
"clean_up_tokenization_spaces": true,
|
933 |
+
"cls_token": "[CLS]",
|
934 |
+
"extra_special_tokens": {},
|
935 |
+
"mask_token": "[MASK]",
|
936 |
+
"model_input_names": [
|
937 |
+
"input_ids",
|
938 |
+
"attention_mask"
|
939 |
+
],
|
940 |
+
"model_max_length": 8192,
|
941 |
+
"pad_token": "[PAD]",
|
942 |
+
"sep_token": "[SEP]",
|
943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
944 |
+
"unk_token": "[UNK]"
|
945 |
+
}
|