pytorch_model.bin upload/update
Browse files- 1_Pooling/config.json +10 -0
- README.md +810 -0
- config.json +31 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +57 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,810 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: []
|
3 |
+
library_name: sentence-transformers
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- generated_from_trainer
|
9 |
+
- dataset_size:900
|
10 |
+
- loss:GISTEmbedLoss
|
11 |
+
base_model: BAAI/bge-small-en-v1.5
|
12 |
+
datasets: []
|
13 |
+
metrics:
|
14 |
+
- cosine_accuracy@1
|
15 |
+
- cosine_accuracy@5
|
16 |
+
- cosine_accuracy@10
|
17 |
+
- cosine_precision@1
|
18 |
+
- cosine_precision@5
|
19 |
+
- cosine_precision@10
|
20 |
+
- cosine_recall@1
|
21 |
+
- cosine_recall@5
|
22 |
+
- cosine_recall@10
|
23 |
+
- cosine_ndcg@5
|
24 |
+
- cosine_ndcg@10
|
25 |
+
- cosine_ndcg@100
|
26 |
+
- cosine_mrr@5
|
27 |
+
- cosine_mrr@10
|
28 |
+
- cosine_mrr@100
|
29 |
+
- cosine_map@100
|
30 |
+
- dot_accuracy@1
|
31 |
+
- dot_accuracy@5
|
32 |
+
- dot_accuracy@10
|
33 |
+
- dot_precision@1
|
34 |
+
- dot_precision@5
|
35 |
+
- dot_precision@10
|
36 |
+
- dot_recall@1
|
37 |
+
- dot_recall@5
|
38 |
+
- dot_recall@10
|
39 |
+
- dot_ndcg@5
|
40 |
+
- dot_ndcg@10
|
41 |
+
- dot_ndcg@100
|
42 |
+
- dot_mrr@5
|
43 |
+
- dot_mrr@10
|
44 |
+
- dot_mrr@100
|
45 |
+
- dot_map@100
|
46 |
+
widget:
|
47 |
+
- source_sentence: What will be used for the identification of beneficiaries?
|
48 |
+
sentences:
|
49 |
+
- '''for loanee and within 30 days for non loanee i.e. 15th Aug for Kharif and
|
50 |
+
15th Jan for Rabi for loanee and 31st Aug for Kharif and 31st Jan for Rabi
|
51 |
+
for Non Loanee 13 Within 7 days from the date of intimation by ICs CSCs/Banks/
|
52 |
+
Intermediary Cut-off date for CSCs/Banks/Intermediary to correct/update the paid
|
53 |
+
application intimated by ICs on Crop Insurance Portal 14 Cut-off date for Insurer
|
54 |
+
to accept the corrected/updated applications Within 7 days from the date of
|
55 |
+
submission of correction/updation by the Bank/CSC Insurance Companies 15 Within
|
56 |
+
7 days from acceptance of proposal by concerned Insurance Company on Portal Cut-off
|
57 |
+
date for Banks/ICs to hand over insurance acknowledgement receipt along with folio
|
58 |
+
to the insured farmer Banks/ICs for enrolment through their intermediaries 16 Cut
|
59 |
+
off date for processing of applications by ICs and auto approval of application
|
60 |
+
of insured farmers on crop insurance Portal 60 days from the cut off date for
|
61 |
+
enrolment/debit of premium from farmers i.e. 15th September for Kharif and 15th
|
62 |
+
February for Rabi seasons 17 Before cut off date of enrolment of farmers Insurance
|
63 |
+
Companies/GOI /State Cut off date for raising bills/requisitions with supporting
|
64 |
+
documents for releasing of advance premium subsidy based on 50% of 80% of respective
|
65 |
+
share of Centre/State in corresponding previous season 18 Release of advance
|
66 |
+
upfront premium subsidy (First Instalment)i.e. 50% of 80% of respective share
|
67 |
+
of Centre/State in corresponding previous season Within 15days of cut off date
|
68 |
+
of enrolment of farmers i.e. 31st July for Kharif Upto 15th August* 19 *state
|
69 |
+
may fix earlier dates for early Kharif crops Training and registration of field
|
70 |
+
level workers assigned for conduct of CCEs and reporting of the same on crop
|
71 |
+
insurance Portal through smart phones/CCE Agri App Upto31st August* *state 20 Registration
|
72 |
+
of mobile number of representative of ICs for co-witnessing of CCEs may fix earlier
|
73 |
+
dates for early Kharif crops At least 7 days before tentative date for conducting
|
74 |
+
CCEs 21 a) Uploading of tentative schedule/date for conducting CCEs (crop-wise/IU
|
75 |
+
wise) followed by SMS on one day notice through CCEs app.'''
|
76 |
+
- '''8 Banks/PACS/CSC/ insurance agent/online enrolment by farmers etc. Upto
|
77 |
+
last date of enrolment of farmers as notified by States for notified crop(s)
|
78 |
+
or up to 15th July* for Kharif season Upto last date of enrolment of farmers as
|
79 |
+
notified by States for notified crop(s) or up to 15th December* for Rabi season Cut-off
|
80 |
+
date for receipt of Applications of farmers/debit of premium from farmers account
|
81 |
+
(loanee and non- loanee) by all stakeholders including banks/PACS/CSC/ insurance
|
82 |
+
agent/online enrolment by farmers etc. Note: *This is indicative only and district
|
83 |
+
wise crop calendar will be the final basis to arrive at cut off date 9 Declaration
|
84 |
+
of Prevented sowing State Govt./ IC Strictly within 15 days from cut off date
|
85 |
+
for enrolment of farmers i.e 31st July for Kharif and 31st Dec for Rabi 10 Banks/Portal Within
|
86 |
+
15 days of cut-off date for enrolment of farmers/debit of premium for both loanee and non-
|
87 |
+
loanee farmers i.e. 31st July for Kharif and 31st Dec for Rabi Cut-off date
|
88 |
+
for electronic remittance of premium along with consolidated Declarations to
|
89 |
+
respective Insurance Company and uploading of details of individual covered farmers
|
90 |
+
on crop insurance Portal by Bank branches (CBs/ RRBs/DCCBs/PACs), followed by SMS
|
91 |
+
to all insured farmers from Portal 11 Within 48 Hours of receipt of application
|
92 |
+
& premium. Insurance companies and their agents Cut-off date for electronic
|
93 |
+
remittance of farmer premium to Insurance Companies for farmers covered on Voluntary
|
94 |
+
basis by designated insurance Agent(s) and uploading of details of individual
|
95 |
+
covered farmers on crop insurance Portal. Insurance Companies 12 Cut-off date
|
96 |
+
for Insurer to accept or reject the farmer''s data on Portal Within 15 days from
|
97 |
+
the cut-off date for uploading of data/information by Banks/PACS/CSC/Agent respectively.'''
|
98 |
+
- ''' This consent of the beneficiary should be \''to agree that the department
|
99 |
+
responsible for implementation of the Pradhan Mantri Kisan Samman Nidhi Yojana
|
100 |
+
in Union Government or the State Government / Union Territory Administration can
|
101 |
+
use the beneficiary Aadhaar number and other information provided in the declaration
|
102 |
+
to verify the eligibility of the beneficiary for scheme as per extant scheme guidelines
|
103 |
+
with the concerned agencies\''. 6.3 The existing land-ownership system in the
|
104 |
+
concerned State / UT will be used for identification of beneficiaries. Accordingly,
|
105 |
+
it is of utmost importance that the land records are clear and updated. Further,
|
106 |
+
State / UT Governments would also expedite the progress of digitization of
|
107 |
+
the land records and linking the same with Aadhaar as well as bank details of
|
108 |
+
the beneficiaries. 6.4 The lists of eligible beneficiaries would be published
|
109 |
+
at the village level. Farmers'' families who are eligible but have been excluded
|
110 |
+
should be provided an opportunity to represent their case.'''
|
111 |
+
- source_sentence: What are weather parameters related to crop condition?
|
112 |
+
sentences:
|
113 |
+
- '''i. \''Credit Facility\'' means any fund based credit facility extended by
|
114 |
+
an Eligible Lending Institution (ELI) to an Eligible Borrower without any Collateral
|
115 |
+
Security or Third Party Guarantee ; ii. \''Credit Guarantee Fund\'' means the
|
116 |
+
Credit Guarantee Fund for FPOs created with NABARD and NCDC respectively under
|
117 |
+
the Scheme with matching grant from DAC&FW for the purpose of extending guarantee
|
118 |
+
to the eligible lending institution(s) against their collateral free lending to eligible
|
119 |
+
FPOs; iii. \''Eligible Lending Institution (ELI)\'' means a Scheduled Commercial
|
120 |
+
Bank for the time being included in the second Schedule to the Reserve Bank of
|
121 |
+
India Act, 1934, Regional Rural Banks, Co-operative Banks, Cooperative Credit Society,
|
122 |
+
NEDFI, or any other institution (s) as may be decided by the NABARD and/or NCDC,
|
123 |
+
as the case may be, in consultation with Government of India from time to time.
|
124 |
+
NABARD and NCDC can also finance, if they so desire with the approval of DAC&FW/N-PMFSC.
|
125 |
+
NBFCs and such other financing institutions with required net worth and track
|
126 |
+
record may also serve as Eligible Lending Institutions (ELIs), for lending to
|
127 |
+
FPOs with a moderate spread between their cost of capital and lending rate. However,
|
128 |
+
Standard Financial Sector Rating Agency should have rated NBFC **to be AAA**
|
129 |
+
to be considered as ELI; iv. \''Guarantee Cover\'' means maximum cover available
|
130 |
+
per eligible FPO borrower; v. \''Guarantee Fee\'' means the onetime fee at
|
131 |
+
a specified rate of the eligible credit facility sanctioned by the ELI, payable
|
132 |
+
by the ELI to NABARD or NCDC, as the case may be; and vi.'''
|
133 |
+
- '''| Table No. |
|
134 |
+
Topic |\n|--------------------------------------------------------------------------------------|-----------------------------------------------------------------------------|\n|
|
135 |
+
Para | |\n|
|
136 |
+
Number | |\n|
|
137 |
+
Table 1 |
|
138 |
+
Premium rate payable by the farmer |\n|
|
139 |
+
Table 2 |
|
140 |
+
Seasonality Discipline |\n|
|
141 |
+
Table 3 |
|
142 |
+
Calculation of crop-wise Sum Insured |\n|
|
143 |
+
Table 4 |
|
144 |
+
Number of CCEs to be conducted at IU level |\n|
|
145 |
+
Table 5 |
|
146 |
+
Yield Calculation for multi-picking crop |\n|
|
147 |
+
Table 6 | |\n|
|
148 |
+
Types of resolution to be used for vegetation index derivation for different | |\n|
|
149 |
+
levels of analysis | |\n|
|
150 |
+
19.7 | |\n|
|
151 |
+
Table 7 |
|
152 |
+
Calculation of Threshold Yield |\n|
|
153 |
+
Table 8 | |\n|
|
154 |
+
Detailed Procedure for On Account Payment of Claims due to Mid-Season | |\n|
|
155 |
+
Adversity | |\n|
|
156 |
+
21.2.7.2 | |\n|
|
157 |
+
Table 9 | |\n|
|
158 |
+
Detailed Procedure and Timelines for payment of Prevented / Failed Sowing | |\n|
|
159 |
+
and Prevented Planting / Germination Claims | |\n|
|
160 |
+
21.3.6.1 | |\n|
|
161 |
+
Table 10 | |\n|
|
162 |
+
Detailed Procedure and Timelines for payment of | |\n|
|
163 |
+
Post Harvest Loss | |\n| | |\n|
|
164 |
+
21.4.8.1 | |\n|
|
165 |
+
Table 11 | |\n|
|
166 |
+
Detailed Procedure and Timelines for payment of | |\n|
|
167 |
+
localized claims | |\n| | |\n|
|
168 |
+
21.5.9.1 | |\n|
|
169 |
+
Table 12 |
|
170 |
+
Criteria for Performance Evaluation and Ranking of Loss Assessment Agencies |\n|
|
171 |
+
Table 13 | |\n|
|
172 |
+
Illustration for classification of risks, clustering/clubbing of risks and districts
|
173 |
+
| |\n|
|
174 |
+
and determination of L1 bidder | |\n|
|
175 |
+
Annexure 1 | |\n|
|
176 |
+
Table 14 | |\n|
|
177 |
+
Penalty and performance Monitoring/Evaluation of Empanelled Insurance | |\n|
|
178 |
+
Companies | |\n|
|
179 |
+
Annexure 2 | |\n|
|
180 |
+
Table 15 |
|
181 |
+
Illustrative Evaluation of Efficiency of Nodal Department of State |'''
|
182 |
+
- '''gridded or satellite based data, Dry-spell Occurrence, Temperature Anomaly,
|
183 |
+
Soil Moisture Analysis (either from satellite or model data) and any other available
|
184 |
+
weather parameters (related to crop condition), either from ground or satellite
|
185 |
+
data. Another rainfall based index, which can also be studied, is Standardized
|
186 |
+
Precipitation Index (SPI). SPI can be developed using IMD gridded rainfall data
|
187 |
+
or NOAA CPC rainfall data. However, while using gridded data (either from satellite
|
188 |
+
or ground stations), appropriate resolution should be used at appropriate level.
|
189 |
+
For example, 0.25 degree data should be used only at district level and not at
|
190 |
+
block/village level. High resolution weather data can also be sourced from weather
|
191 |
+
companies.'''
|
192 |
+
- source_sentence: How is the amount realized by the ELI from the FPC shared between
|
193 |
+
SFAC and ELI?
|
194 |
+
sentences:
|
195 |
+
- '''To substantiate the fact, the most successful example is of dairy co-operative
|
196 |
+
in India where professional managers have contributed immensely to make it a success.
|
197 |
+
There are other so many examples which prove the absolute requirement of professional
|
198 |
+
managers. The number of professional staff could depend on geographical spread
|
199 |
+
of business operation, diversity of activities and volume of business. However,
|
200 |
+
an FPO should have minimum a CEO/Manager and an Accountant. Accountant is required
|
201 |
+
in FPO to look after its day to day accounting work. Based on requirement, FPO
|
202 |
+
can engage other staff also. 10.3 The CEO/Manager is to be appointed by the
|
203 |
+
executive body of the FPO who should be either graduate in agriculture / agriculture
|
204 |
+
marketing / agri-business management or BBA or equivalent. Locally available professionals
|
205 |
+
with 10+2 and preferably diploma in agriculture / agriculture marketing / agri-business
|
206 |
+
management or in such other related areas may be preferable. The accountant should
|
207 |
+
have educational qualification of 10+2 with Mathematics as a compulsory subject
|
208 |
+
or alternatively with Commerce or Accountancy background. If any members of the
|
209 |
+
FPO meet the above criteria, they may be considered preferably in the selection
|
210 |
+
process. 10.4 Under the scheme, financial support towards salary of CEO/Manager
|
211 |
+
up to @ Rs. 25,000/- per month and of Accountant up to @ Rs.10,000/- per
|
212 |
+
month with annual increment up to 5% is to be provided from the earmarked financial
|
213 |
+
support for first 3 years only. Thereafter, FPOs will manage from their own resources
|
214 |
+
to pay the salary of CEO/Manager and Accountant. In order to create interest
|
215 |
+
of good professional activities of CEO/Accountant, the FPO may also offer higher
|
216 |
+
payment with their own sources of funds on above of Govt. support. One CEO will
|
217 |
+
provide full time services to one FPO at a time only.'''
|
218 |
+
- '''7.3.1 Three levels of Indemnity, viz., 70%, 80% and 90%. SLCCCI in consultation
|
219 |
+
with Insurance Companies shall approve indemnity levels for notified crops at
|
220 |
+
district level. Threshold Yield (TY) shall be notified in the Tender for the
|
221 |
+
current season and the same will be used for claim calculation for that season.
|
222 |
+
The Average Yield of a notified crop in Insurance Unit (IU) will be average yield
|
223 |
+
of best five years out of last seven years. The Threshold yield of the notified
|
224 |
+
crop is equal to Average Yield multiplied by Indemnity level. The Threshold Yield
|
225 |
+
for any crop and IU shall compulsorily be part of the notification for the season
|
226 |
+
and shall not change at any point during that season. 7.3.2 Calculation and
|
227 |
+
Notification of Threshold Yield: For calculation of Threshold Yield, historical
|
228 |
+
average yield of best five out of last seven years shall be considered. Further
|
229 |
+
Threshold Yield should be defined only at notified area level and once notified
|
230 |
+
in the Notification issued by the State should not be changed at later stage
|
231 |
+
under any circumstances. In case of multi-year contract, the Threshold Yield for the
|
232 |
+
subsequent years shall be revised by adding/considering the yields of immediate
|
233 |
+
previous corresponding season. The revised TY and Sum Insured (if revised) should
|
234 |
+
be notified accordingly at the beginning of each crop season in case of multi-year
|
235 |
+
tender.'''
|
236 |
+
- '''a. Only such Claim as is submitted by the ELI within a maximum period of one
|
237 |
+
year from date of NPA or as specified by SFAC from time to time, shall be considered
|
238 |
+
by SFAC. b. On receiving a claim, the I&CSC shall review in detail the reasons
|
239 |
+
for the Default. The Committee reserves the right to reject any proposal where
|
240 |
+
the Guidelines have not been strictly followed or if any misrepresentation or
|
241 |
+
concealment of facts is found leading to undue favour to the concerned FPC. c.
|
242 |
+
SFAC shall honour 75 per cent of the Guaranteed Amount in Default subject to a maximum
|
243 |
+
of 75 per cent of the guaranteed cap amount, on submission of claim by the ELI
|
244 |
+
where appropriate action for recovery has been initiated. The balance 25 per cent
|
245 |
+
of the default or guaranteed cap amount, as the case may be, shall be paid on
|
246 |
+
conclusion of recovery proceedings by ELI. d. SFAC shall pay claims found in
|
247 |
+
order and complete in all respects, within 90 days. e. The outstanding dues of
|
248 |
+
the FPC to ELI shall be reduced to the extent of the claim amount settled by
|
249 |
+
SFAC. f. The ELI shall continue to make efforts to realise the balance amount
|
250 |
+
due from the g. defaulting FPC even after settlement of the Guarantee. h. SFAC
|
251 |
+
has the right to claim from the ELI any amount that is realised by the ELI from the
|
252 |
+
defaulting FPC even after settlement of the guarantee amount. i. Any amount realized
|
253 |
+
by the ELI from the FPC shall be shared in the ratio of 85%:15% between SFAC
|
254 |
+
& ELI.'''
|
255 |
+
- source_sentence: What directions is the lending institution bound to comply with?
|
256 |
+
sentences:
|
257 |
+
- ''' The I&CSC shall regularly keep the NABARD and NCDC, as the case may be, informed
|
258 |
+
about all major decisions and actions taken by it in this regard, and shall work
|
259 |
+
under its overall control and guidance with regard to the Fund and the Scheme.
|
260 |
+
Simultaneously, the NABARD and NCDC will keep the N- PMAFSC apprised about such
|
261 |
+
decisions and actions taken in this regard. v. Decisions regarding Guarantee
|
262 |
+
Pay-Outs shall be the primary responsibility of I&CSC which shall meet at least
|
263 |
+
once every quarter or as often as necessary. vi. The ELI may invoke the Guarantee
|
264 |
+
in respect of Credit Facility within a maximum period of one year from the date
|
265 |
+
of NPA, if the conditions set out by NABARD or NCDC, as the case may be , are
|
266 |
+
met out .'''
|
267 |
+
- ''' The lending institution shall be bound to comply with such directions as NABARD
|
268 |
+
or NCDC, as the case may be, may deem fit to issue from time to time, for facilitating
|
269 |
+
recoveries of the guaranteed account, or safeguarding its interest as a guarantor. ix. The
|
270 |
+
lending institution shall, in particular, refrain from any act either before or
|
271 |
+
subsequent to invocation of guarantee, which may adversely affect the interest
|
272 |
+
of NABARD or NCDC, as the case may be, as the guarantor. x. The lending institution
|
273 |
+
shall be bound under the Scheme to intimate in advance to NABARD or NCDC, as the
|
274 |
+
case may be, its intention to enter into any compromise or arrangement, which
|
275 |
+
may have effect of discharge or waiver of primary security. xi. Further, the
|
276 |
+
lending institution shall secure for NABARD or NCDC, as the case may be, or
|
277 |
+
its appointed agency, through a stipulation in an Agreement with the Borrower
|
278 |
+
or otherwise, the right to list the defaulted Borrowers'' names and particulars
|
279 |
+
on the Website of NABARD or NCDC, as the case may be or Integrated Portal'''
|
280 |
+
- '''| AIC | Agricultural Insurance Company of India Ltd |\n|--------|----------------------------------------------------------------------------------------------|\n|
|
281 |
+
ACF | Area Correction Factor |\n|
|
282 |
+
APR | Actuarial Premium Rate |\n|
|
283 |
+
ARG | Automatic Rain Gauge |\n|
|
284 |
+
AWS | Automatic Weather Stations |\n|
|
285 |
+
AY | Actual Yield |\n|
|
286 |
+
CB | Commercial Banks |\n|
|
287 |
+
CBS | Core Banking Solution |\n|
|
288 |
+
CCAFS | Research program on Climate Change, Agriculture and Food Security |\n|
|
289 |
+
CCEs | Crop Cutting Experiments |\n|
|
290 |
+
CPMU | Central Program Management Unit |\n|
|
291 |
+
CSC | Common Service Center |\n|
|
292 |
+
CSO | Central Statistical Office |\n|
|
293 |
+
CV | Co-efficient of Variance |\n|
|
294 |
+
DAC&FW | Department of Agriculture, Cooperation and Farmers Welfare |\n|
|
295 |
+
DBT | Direct Benefit Transfer |\n|
|
296 |
+
DCCBs | District Central Cooperative Banks |\n|
|
297 |
+
DLMC | District Level Monitoring Committee |\n|
|
298 |
+
DLTC | District Level Technical Committee |\n|
|
299 |
+
ESI | Expected Sum Insured |\n|
|
300 |
+
FASAL | Forecasting Agricultural output using Space, Agro meteorological and
|
301 |
+
Land based observations |\n| FIs | Financial Institutions |\n|
|
302 |
+
GIC Re | General Insurance Corporation of India |\n|
|
303 |
+
GFR | General Financial Rule |\n|
|
304 |
+
GIS | Geographic Information System |\n|
|
305 |
+
GPS | Global Positioning System |\n|
|
306 |
+
IA | Implementing Agency |\n|
|
307 |
+
IC | Insurance Company |\n|
|
308 |
+
IASRI | Indian Agricultural Statistical Research Institute |\n|
|
309 |
+
IFPRI | International Food Policy Research Institute |\n|
|
310 |
+
IMD | Indian Meteorological Department |\n|
|
311 |
+
IRRI | International Rice Research Institute |\n|
|
312 |
+
IRDAI | Insurance Regulatory and Development Authority of India |\n|
|
313 |
+
ISRO | Indian Space Research Organisation |\n|
|
314 |
+
ISS | Interest Subvention Scheme |\n|
|
315 |
+
IT | Information Technology |\n|
|
316 |
+
IU | Insurance Unit |\n|
|
317 |
+
KCC | Kisan Credit Cards |\n|
|
318 |
+
LC | Loss Cost |\n|
|
319 |
+
LPA | Long period Average |\n|
|
320 |
+
LPC | Land Possession Certificate |\n|
|
321 |
+
MIS | Management Information System |\n|
|
322 |
+
MNCFC | Mahalanobis National Crop Forecast Centre |\n|
|
323 |
+
MOA&FW | Ministry of Agriculture and Farmers Welfare |'''
|
324 |
+
- source_sentence: How can we identify outliers in crop yield?
|
325 |
+
sentences:
|
326 |
+
- '''(i) It will closely monitor and review the progress of FPO development and functioning
|
327 |
+
by holding its regular meetings. (ii) It will suggest the potential produce
|
328 |
+
clusters in the district (where FPOs can be formed & promoted) to N-PMAFSC and
|
329 |
+
will also assist Implementing Agencies, CBBOs and other stakeholders in identification
|
330 |
+
of cluster(s) and activity (ies) and also in mobilization of farmers. (iii) It
|
331 |
+
will resolve the financial constraints of FPOs through District Level Bankers'' Committee
|
332 |
+
and provide feedback to N-PMAFSC. (iv) It will identify the constraints in implementation
|
333 |
+
of scheme at the ground level and communicate the same to State Level Consultative
|
334 |
+
Committee for further taking up the matter with DAC&FW and N-PMAFSC for appropriate
|
335 |
+
policy decision. (v) Any other matter so decided by the committee in the interest
|
336 |
+
of the scheme and farmers. 14.4 Close and effective monitoring has been considered
|
337 |
+
a major trigger for success of this scheme. Therefore, in addition to three tiered
|
338 |
+
afore-stated structured mechanism for monitoring of the scheme, there shall be
|
339 |
+
continuous in-house monitoring by DAC&FW and by the Implementing Agencies also.
|
340 |
+
The DAC&FW may utilize the services of Directorate of Marketing & Inspection (DMI),
|
341 |
+
which has existence through its Regional & Sub-Offices across the country. For
|
342 |
+
effective monitoring, DAC&FW may engage consultants also and cost for same will
|
343 |
+
be borne from the budget of the scheme itself.'''
|
344 |
+
- '''Under CGF, NABARD and NCDC, as the case may be, shall cover: i. Fund based
|
345 |
+
Credit facilities already sanctioned / extended within six months from the date
|
346 |
+
of the application for the Guarantee Cover or intended to be extended singly or
|
347 |
+
jointly by one or more than one Eligible Lending Institution (ELI) to a single
|
348 |
+
eligible FPO borrower by way of term loan and/or working capital/composite credit
|
349 |
+
facilities without any collateral security and/or third party guarantees. ii.
|
350 |
+
The ELI can extend credit without any limit; however, the Guarantee Cover shall be
|
351 |
+
limited to the maximum guarantee cover specified under the Scheme. iii. Non-Banking
|
352 |
+
Financial Companies (NBFCs) and such other Financial Institutions (FIs) with
|
353 |
+
required net worth, track record and rating of AAA may also be accommodated
|
354 |
+
as Eligible Lending Institutions (ELIs), such NBFC should on-ward lend to FPOs
|
355 |
+
with a moderate spread between their cost of capital and lending rate.'''
|
356 |
+
- '''Identification of Outliers: All these above analyses can be used to check whether
|
357 |
+
there was any reason for yield deviation as presented in the CCE data. Then a
|
358 |
+
yield proxy map may be prepared. The Yield proxy map can be derived from remote
|
359 |
+
sensing vegetation indices (single or combination of indices), crop simulation
|
360 |
+
model output, or an integration of various parameters, which are related to crop
|
361 |
+
yield, such as soil, weather (gridded), satellite based products, etc. Whatever,
|
362 |
+
yield proxies to be used, it is the responsibility of the organization to record documentary
|
363 |
+
evidence (from their or other''s published work) that the yield proxy is related
|
364 |
+
to the particular crop''s yield. Then the IU level yields need to be overlaid
|
365 |
+
on the yield proxy map. Both yield proxy and CCE yield can be divided into 4-5
|
366 |
+
categories (e.g. Very good, Good, Medium, Poor, Very poor). Wherever there is
|
367 |
+
large mismatch between yield proxy and the CCE yield (more than 2 levels), the
|
368 |
+
CCE yield for that IU can be considered, as outliers.'''
|
369 |
+
pipeline_tag: sentence-similarity
|
370 |
+
model-index:
|
371 |
+
- name: SentenceTransformer based on BAAI/bge-small-en-v1.5
|
372 |
+
results:
|
373 |
+
- task:
|
374 |
+
type: information-retrieval
|
375 |
+
name: Information Retrieval
|
376 |
+
dataset:
|
377 |
+
name: val evaluator
|
378 |
+
type: val_evaluator
|
379 |
+
metrics:
|
380 |
+
- type: cosine_accuracy@1
|
381 |
+
value: 0.48
|
382 |
+
name: Cosine Accuracy@1
|
383 |
+
- type: cosine_accuracy@5
|
384 |
+
value: 0.88
|
385 |
+
name: Cosine Accuracy@5
|
386 |
+
- type: cosine_accuracy@10
|
387 |
+
value: 0.95
|
388 |
+
name: Cosine Accuracy@10
|
389 |
+
- type: cosine_precision@1
|
390 |
+
value: 0.48
|
391 |
+
name: Cosine Precision@1
|
392 |
+
- type: cosine_precision@5
|
393 |
+
value: 0.17599999999999993
|
394 |
+
name: Cosine Precision@5
|
395 |
+
- type: cosine_precision@10
|
396 |
+
value: 0.09499999999999999
|
397 |
+
name: Cosine Precision@10
|
398 |
+
- type: cosine_recall@1
|
399 |
+
value: 0.48
|
400 |
+
name: Cosine Recall@1
|
401 |
+
- type: cosine_recall@5
|
402 |
+
value: 0.88
|
403 |
+
name: Cosine Recall@5
|
404 |
+
- type: cosine_recall@10
|
405 |
+
value: 0.95
|
406 |
+
name: Cosine Recall@10
|
407 |
+
- type: cosine_ndcg@5
|
408 |
+
value: 0.7065704999222873
|
409 |
+
name: Cosine Ndcg@5
|
410 |
+
- type: cosine_ndcg@10
|
411 |
+
value: 0.7288298734374183
|
412 |
+
name: Cosine Ndcg@10
|
413 |
+
- type: cosine_ndcg@100
|
414 |
+
value: 0.7407135931762043
|
415 |
+
name: Cosine Ndcg@100
|
416 |
+
- type: cosine_mrr@5
|
417 |
+
value: 0.6475
|
418 |
+
name: Cosine Mrr@5
|
419 |
+
- type: cosine_mrr@10
|
420 |
+
value: 0.6564841269841272
|
421 |
+
name: Cosine Mrr@10
|
422 |
+
- type: cosine_mrr@100
|
423 |
+
value: 0.6595216619129662
|
424 |
+
name: Cosine Mrr@100
|
425 |
+
- type: cosine_map@100
|
426 |
+
value: 0.6595216619129661
|
427 |
+
name: Cosine Map@100
|
428 |
+
- type: dot_accuracy@1
|
429 |
+
value: 0.48
|
430 |
+
name: Dot Accuracy@1
|
431 |
+
- type: dot_accuracy@5
|
432 |
+
value: 0.88
|
433 |
+
name: Dot Accuracy@5
|
434 |
+
- type: dot_accuracy@10
|
435 |
+
value: 0.95
|
436 |
+
name: Dot Accuracy@10
|
437 |
+
- type: dot_precision@1
|
438 |
+
value: 0.48
|
439 |
+
name: Dot Precision@1
|
440 |
+
- type: dot_precision@5
|
441 |
+
value: 0.17599999999999993
|
442 |
+
name: Dot Precision@5
|
443 |
+
- type: dot_precision@10
|
444 |
+
value: 0.09499999999999999
|
445 |
+
name: Dot Precision@10
|
446 |
+
- type: dot_recall@1
|
447 |
+
value: 0.48
|
448 |
+
name: Dot Recall@1
|
449 |
+
- type: dot_recall@5
|
450 |
+
value: 0.88
|
451 |
+
name: Dot Recall@5
|
452 |
+
- type: dot_recall@10
|
453 |
+
value: 0.95
|
454 |
+
name: Dot Recall@10
|
455 |
+
- type: dot_ndcg@5
|
456 |
+
value: 0.7065704999222873
|
457 |
+
name: Dot Ndcg@5
|
458 |
+
- type: dot_ndcg@10
|
459 |
+
value: 0.7288298734374183
|
460 |
+
name: Dot Ndcg@10
|
461 |
+
- type: dot_ndcg@100
|
462 |
+
value: 0.7407135931762043
|
463 |
+
name: Dot Ndcg@100
|
464 |
+
- type: dot_mrr@5
|
465 |
+
value: 0.6475
|
466 |
+
name: Dot Mrr@5
|
467 |
+
- type: dot_mrr@10
|
468 |
+
value: 0.6564841269841272
|
469 |
+
name: Dot Mrr@10
|
470 |
+
- type: dot_mrr@100
|
471 |
+
value: 0.6595216619129662
|
472 |
+
name: Dot Mrr@100
|
473 |
+
- type: dot_map@100
|
474 |
+
value: 0.6595216619129661
|
475 |
+
name: Dot Map@100
|
476 |
+
---
|
477 |
+
|
478 |
+
# SentenceTransformer based on BAAI/bge-small-en-v1.5
|
479 |
+
|
480 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
481 |
+
|
482 |
+
## Model Details
|
483 |
+
|
484 |
+
### Model Description
|
485 |
+
- **Model Type:** Sentence Transformer
|
486 |
+
- **Base model:** [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) <!-- at revision 5c38ec7c405ec4b44b94cc5a9bb96e735b38267a -->
|
487 |
+
- **Maximum Sequence Length:** 512 tokens
|
488 |
+
- **Output Dimensionality:** 384 tokens
|
489 |
+
- **Similarity Function:** Cosine Similarity
|
490 |
+
<!-- - **Training Dataset:** Unknown -->
|
491 |
+
<!-- - **Language:** Unknown -->
|
492 |
+
<!-- - **License:** Unknown -->
|
493 |
+
|
494 |
+
### Model Sources
|
495 |
+
|
496 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
497 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
498 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
499 |
+
|
500 |
+
### Full Model Architecture
|
501 |
+
|
502 |
+
```
|
503 |
+
SentenceTransformer(
|
504 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel
|
505 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
506 |
+
(2): Normalize()
|
507 |
+
)
|
508 |
+
```
|
509 |
+
|
510 |
+
## Usage
|
511 |
+
|
512 |
+
### Direct Usage (Sentence Transformers)
|
513 |
+
|
514 |
+
First install the Sentence Transformers library:
|
515 |
+
|
516 |
+
```bash
|
517 |
+
pip install -U sentence-transformers
|
518 |
+
```
|
519 |
+
|
520 |
+
Then you can load this model and run inference.
|
521 |
+
```python
|
522 |
+
from sentence_transformers import SentenceTransformer
|
523 |
+
|
524 |
+
# Download from the 🤗 Hub
|
525 |
+
model = SentenceTransformer("smokxy/embedding_finetuned")
|
526 |
+
# Run inference
|
527 |
+
sentences = [
|
528 |
+
'How can we identify outliers in crop yield?',
|
529 |
+
"'Identification of Outliers: All these above analyses can be used to check whether there was any reason for yield deviation as presented in the CCE data. Then a yield proxy map may be prepared. The Yield proxy map can be derived from remote sensing vegetation indices (single or combination of indices), crop simulation model output, or an integration of various parameters, which are related to crop yield, such as soil, weather (gridded), satellite based products, etc. Whatever, yield proxies to be used, it is the responsibility of the organization to record documentary evidence (from their or other's published work) that the yield proxy is related to the particular crop's yield. Then the IU level yields need to be overlaid on the yield proxy map. Both yield proxy and CCE yield can be divided into 4-5 categories (e.g. Very good, Good, Medium, Poor, Very poor). Wherever there is large mismatch between yield proxy and the CCE yield (more than 2 levels), the CCE yield for that IU can be considered, as outliers.'",
|
530 |
+
"'Under CGF, NABARD and NCDC, as the case may be, shall cover: i. Fund based Credit facilities already sanctioned / extended within six months from the date of the application for the Guarantee Cover or intended to be extended singly or jointly by one or more than one Eligible Lending Institution (ELI) to a single eligible FPO borrower by way of term loan and/or working capital/composite credit facilities without any collateral security and/or third party guarantees. ii. The ELI can extend credit without any limit; however, the Guarantee Cover shall be limited to the maximum guarantee cover specified under the Scheme. iii. Non-Banking Financial Companies (NBFCs) and such other Financial Institutions (FIs) with required net worth, track record and rating of AAA may also be accommodated as Eligible Lending Institutions (ELIs), such NBFC should on-ward lend to FPOs with a moderate spread between their cost of capital and lending rate.'",
|
531 |
+
]
|
532 |
+
embeddings = model.encode(sentences)
|
533 |
+
print(embeddings.shape)
|
534 |
+
# [3, 384]
|
535 |
+
|
536 |
+
# Get the similarity scores for the embeddings
|
537 |
+
similarities = model.similarity(embeddings, embeddings)
|
538 |
+
print(similarities.shape)
|
539 |
+
# [3, 3]
|
540 |
+
```
|
541 |
+
|
542 |
+
<!--
|
543 |
+
### Direct Usage (Transformers)
|
544 |
+
|
545 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
546 |
+
|
547 |
+
</details>
|
548 |
+
-->
|
549 |
+
|
550 |
+
<!--
|
551 |
+
### Downstream Usage (Sentence Transformers)
|
552 |
+
|
553 |
+
You can finetune this model on your own dataset.
|
554 |
+
|
555 |
+
<details><summary>Click to expand</summary>
|
556 |
+
|
557 |
+
</details>
|
558 |
+
-->
|
559 |
+
|
560 |
+
<!--
|
561 |
+
### Out-of-Scope Use
|
562 |
+
|
563 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
564 |
+
-->
|
565 |
+
|
566 |
+
## Evaluation
|
567 |
+
|
568 |
+
### Metrics
|
569 |
+
|
570 |
+
#### Information Retrieval
|
571 |
+
* Dataset: `val_evaluator`
|
572 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
573 |
+
|
574 |
+
| Metric | Value |
|
575 |
+
|:--------------------|:-----------|
|
576 |
+
| cosine_accuracy@1 | 0.48 |
|
577 |
+
| cosine_accuracy@5 | 0.88 |
|
578 |
+
| cosine_accuracy@10 | 0.95 |
|
579 |
+
| cosine_precision@1 | 0.48 |
|
580 |
+
| cosine_precision@5 | 0.176 |
|
581 |
+
| cosine_precision@10 | 0.095 |
|
582 |
+
| cosine_recall@1 | 0.48 |
|
583 |
+
| cosine_recall@5 | 0.88 |
|
584 |
+
| cosine_recall@10 | 0.95 |
|
585 |
+
| cosine_ndcg@5 | 0.7066 |
|
586 |
+
| cosine_ndcg@10 | 0.7288 |
|
587 |
+
| cosine_ndcg@100 | 0.7407 |
|
588 |
+
| cosine_mrr@5 | 0.6475 |
|
589 |
+
| cosine_mrr@10 | 0.6565 |
|
590 |
+
| cosine_mrr@100 | 0.6595 |
|
591 |
+
| **cosine_map@100** | **0.6595** |
|
592 |
+
| dot_accuracy@1 | 0.48 |
|
593 |
+
| dot_accuracy@5 | 0.88 |
|
594 |
+
| dot_accuracy@10 | 0.95 |
|
595 |
+
| dot_precision@1 | 0.48 |
|
596 |
+
| dot_precision@5 | 0.176 |
|
597 |
+
| dot_precision@10 | 0.095 |
|
598 |
+
| dot_recall@1 | 0.48 |
|
599 |
+
| dot_recall@5 | 0.88 |
|
600 |
+
| dot_recall@10 | 0.95 |
|
601 |
+
| dot_ndcg@5 | 0.7066 |
|
602 |
+
| dot_ndcg@10 | 0.7288 |
|
603 |
+
| dot_ndcg@100 | 0.7407 |
|
604 |
+
| dot_mrr@5 | 0.6475 |
|
605 |
+
| dot_mrr@10 | 0.6565 |
|
606 |
+
| dot_mrr@100 | 0.6595 |
|
607 |
+
| dot_map@100 | 0.6595 |
|
608 |
+
|
609 |
+
<!--
|
610 |
+
## Bias, Risks and Limitations
|
611 |
+
|
612 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
613 |
+
-->
|
614 |
+
|
615 |
+
<!--
|
616 |
+
### Recommendations
|
617 |
+
|
618 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
619 |
+
-->
|
620 |
+
|
621 |
+
## Training Details
|
622 |
+
|
623 |
+
### Training Hyperparameters
|
624 |
+
#### Non-Default Hyperparameters
|
625 |
+
|
626 |
+
- `eval_strategy`: steps
|
627 |
+
- `gradient_accumulation_steps`: 4
|
628 |
+
- `learning_rate`: 1e-05
|
629 |
+
- `weight_decay`: 0.01
|
630 |
+
- `num_train_epochs`: 1.0
|
631 |
+
- `warmup_ratio`: 0.1
|
632 |
+
- `load_best_model_at_end`: True
|
633 |
+
|
634 |
+
#### All Hyperparameters
|
635 |
+
<details><summary>Click to expand</summary>
|
636 |
+
|
637 |
+
- `overwrite_output_dir`: False
|
638 |
+
- `do_predict`: False
|
639 |
+
- `eval_strategy`: steps
|
640 |
+
- `prediction_loss_only`: True
|
641 |
+
- `per_device_train_batch_size`: 8
|
642 |
+
- `per_device_eval_batch_size`: 8
|
643 |
+
- `per_gpu_train_batch_size`: None
|
644 |
+
- `per_gpu_eval_batch_size`: None
|
645 |
+
- `gradient_accumulation_steps`: 4
|
646 |
+
- `eval_accumulation_steps`: None
|
647 |
+
- `learning_rate`: 1e-05
|
648 |
+
- `weight_decay`: 0.01
|
649 |
+
- `adam_beta1`: 0.9
|
650 |
+
- `adam_beta2`: 0.999
|
651 |
+
- `adam_epsilon`: 1e-08
|
652 |
+
- `max_grad_norm`: 1.0
|
653 |
+
- `num_train_epochs`: 1.0
|
654 |
+
- `max_steps`: -1
|
655 |
+
- `lr_scheduler_type`: linear
|
656 |
+
- `lr_scheduler_kwargs`: {}
|
657 |
+
- `warmup_ratio`: 0.1
|
658 |
+
- `warmup_steps`: 0
|
659 |
+
- `log_level`: passive
|
660 |
+
- `log_level_replica`: warning
|
661 |
+
- `log_on_each_node`: True
|
662 |
+
- `logging_nan_inf_filter`: True
|
663 |
+
- `save_safetensors`: True
|
664 |
+
- `save_on_each_node`: False
|
665 |
+
- `save_only_model`: False
|
666 |
+
- `restore_callback_states_from_checkpoint`: False
|
667 |
+
- `no_cuda`: False
|
668 |
+
- `use_cpu`: False
|
669 |
+
- `use_mps_device`: False
|
670 |
+
- `seed`: 42
|
671 |
+
- `data_seed`: None
|
672 |
+
- `jit_mode_eval`: False
|
673 |
+
- `use_ipex`: False
|
674 |
+
- `bf16`: False
|
675 |
+
- `fp16`: False
|
676 |
+
- `fp16_opt_level`: O1
|
677 |
+
- `half_precision_backend`: auto
|
678 |
+
- `bf16_full_eval`: False
|
679 |
+
- `fp16_full_eval`: False
|
680 |
+
- `tf32`: None
|
681 |
+
- `local_rank`: 0
|
682 |
+
- `ddp_backend`: None
|
683 |
+
- `tpu_num_cores`: None
|
684 |
+
- `tpu_metrics_debug`: False
|
685 |
+
- `debug`: []
|
686 |
+
- `dataloader_drop_last`: False
|
687 |
+
- `dataloader_num_workers`: 0
|
688 |
+
- `dataloader_prefetch_factor`: None
|
689 |
+
- `past_index`: -1
|
690 |
+
- `disable_tqdm`: False
|
691 |
+
- `remove_unused_columns`: True
|
692 |
+
- `label_names`: None
|
693 |
+
- `load_best_model_at_end`: True
|
694 |
+
- `ignore_data_skip`: False
|
695 |
+
- `fsdp`: []
|
696 |
+
- `fsdp_min_num_params`: 0
|
697 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
698 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
699 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
700 |
+
- `deepspeed`: None
|
701 |
+
- `label_smoothing_factor`: 0.0
|
702 |
+
- `optim`: adamw_torch
|
703 |
+
- `optim_args`: None
|
704 |
+
- `adafactor`: False
|
705 |
+
- `group_by_length`: False
|
706 |
+
- `length_column_name`: length
|
707 |
+
- `ddp_find_unused_parameters`: None
|
708 |
+
- `ddp_bucket_cap_mb`: None
|
709 |
+
- `ddp_broadcast_buffers`: False
|
710 |
+
- `dataloader_pin_memory`: True
|
711 |
+
- `dataloader_persistent_workers`: False
|
712 |
+
- `skip_memory_metrics`: True
|
713 |
+
- `use_legacy_prediction_loop`: False
|
714 |
+
- `push_to_hub`: False
|
715 |
+
- `resume_from_checkpoint`: None
|
716 |
+
- `hub_model_id`: None
|
717 |
+
- `hub_strategy`: every_save
|
718 |
+
- `hub_private_repo`: False
|
719 |
+
- `hub_always_push`: False
|
720 |
+
- `gradient_checkpointing`: False
|
721 |
+
- `gradient_checkpointing_kwargs`: None
|
722 |
+
- `include_inputs_for_metrics`: False
|
723 |
+
- `eval_do_concat_batches`: True
|
724 |
+
- `fp16_backend`: auto
|
725 |
+
- `push_to_hub_model_id`: None
|
726 |
+
- `push_to_hub_organization`: None
|
727 |
+
- `mp_parameters`:
|
728 |
+
- `auto_find_batch_size`: False
|
729 |
+
- `full_determinism`: False
|
730 |
+
- `torchdynamo`: None
|
731 |
+
- `ray_scope`: last
|
732 |
+
- `ddp_timeout`: 1800
|
733 |
+
- `torch_compile`: False
|
734 |
+
- `torch_compile_backend`: None
|
735 |
+
- `torch_compile_mode`: None
|
736 |
+
- `dispatch_batches`: None
|
737 |
+
- `split_batches`: None
|
738 |
+
- `include_tokens_per_second`: False
|
739 |
+
- `include_num_input_tokens_seen`: False
|
740 |
+
- `neftune_noise_alpha`: None
|
741 |
+
- `optim_target_modules`: None
|
742 |
+
- `batch_eval_metrics`: False
|
743 |
+
- `batch_sampler`: batch_sampler
|
744 |
+
- `multi_dataset_batch_sampler`: proportional
|
745 |
+
|
746 |
+
</details>
|
747 |
+
|
748 |
+
### Training Logs
|
749 |
+
| Epoch | Step | Training Loss | loss | val_evaluator_cosine_map@100 |
|
750 |
+
|:---------:|:------:|:-------------:|:----------:|:----------------------------:|
|
751 |
+
| **0.531** | **15** | **0.4478** | **0.0912** | **0.6595** |
|
752 |
+
| 0.9912 | 28 | - | 0.0912 | 0.6595 |
|
753 |
+
|
754 |
+
* The bold row denotes the saved checkpoint.
|
755 |
+
|
756 |
+
### Framework Versions
|
757 |
+
- Python: 3.10.14
|
758 |
+
- Sentence Transformers: 3.0.1
|
759 |
+
- Transformers: 4.41.1
|
760 |
+
- PyTorch: 2.3.0+cu121
|
761 |
+
- Accelerate: 0.27.2
|
762 |
+
- Datasets: 2.19.1
|
763 |
+
- Tokenizers: 0.19.1
|
764 |
+
|
765 |
+
## Citation
|
766 |
+
|
767 |
+
### BibTeX
|
768 |
+
|
769 |
+
#### Sentence Transformers
|
770 |
+
```bibtex
|
771 |
+
@inproceedings{reimers-2019-sentence-bert,
|
772 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
773 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
774 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
775 |
+
month = "11",
|
776 |
+
year = "2019",
|
777 |
+
publisher = "Association for Computational Linguistics",
|
778 |
+
url = "https://arxiv.org/abs/1908.10084",
|
779 |
+
}
|
780 |
+
```
|
781 |
+
|
782 |
+
#### GISTEmbedLoss
|
783 |
+
```bibtex
|
784 |
+
@misc{solatorio2024gistembed,
|
785 |
+
title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
|
786 |
+
author={Aivin V. Solatorio},
|
787 |
+
year={2024},
|
788 |
+
eprint={2402.16829},
|
789 |
+
archivePrefix={arXiv},
|
790 |
+
primaryClass={cs.LG}
|
791 |
+
}
|
792 |
+
```
|
793 |
+
|
794 |
+
<!--
|
795 |
+
## Glossary
|
796 |
+
|
797 |
+
*Clearly define terms in order to be accessible across audiences.*
|
798 |
+
-->
|
799 |
+
|
800 |
+
<!--
|
801 |
+
## Model Card Authors
|
802 |
+
|
803 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
804 |
+
-->
|
805 |
+
|
806 |
+
<!--
|
807 |
+
## Model Card Contact
|
808 |
+
|
809 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
810 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "BAAI/bge-small-en-v1.5",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 1536,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 12,
|
23 |
+
"num_hidden_layers": 12,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.41.1",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.41.1",
|
5 |
+
"pytorch": "2.3.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c9e5db1d3ec968d6f0160b28eb1185750c49200f0f93798f99afd2677904685
|
3 |
+
size 133462128
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"never_split": null,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"strip_accents": null,
|
54 |
+
"tokenize_chinese_chars": true,
|
55 |
+
"tokenizer_class": "BertTokenizer",
|
56 |
+
"unk_token": "[UNK]"
|
57 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|