pytorch_model.bin upload/update
Browse files- 1_Pooling/config.json +10 -0
- README.md +820 -0
- config.json +26 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +64 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,820 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: []
|
3 |
+
library_name: sentence-transformers
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- generated_from_trainer
|
9 |
+
- dataset_size:900
|
10 |
+
- loss:GISTEmbedLoss
|
11 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
12 |
+
datasets: []
|
13 |
+
metrics:
|
14 |
+
- cosine_accuracy@1
|
15 |
+
- cosine_accuracy@5
|
16 |
+
- cosine_accuracy@10
|
17 |
+
- cosine_precision@1
|
18 |
+
- cosine_precision@5
|
19 |
+
- cosine_precision@10
|
20 |
+
- cosine_recall@1
|
21 |
+
- cosine_recall@5
|
22 |
+
- cosine_recall@10
|
23 |
+
- cosine_ndcg@5
|
24 |
+
- cosine_ndcg@10
|
25 |
+
- cosine_ndcg@100
|
26 |
+
- cosine_mrr@5
|
27 |
+
- cosine_mrr@10
|
28 |
+
- cosine_mrr@100
|
29 |
+
- cosine_map@100
|
30 |
+
- dot_accuracy@1
|
31 |
+
- dot_accuracy@5
|
32 |
+
- dot_accuracy@10
|
33 |
+
- dot_precision@1
|
34 |
+
- dot_precision@5
|
35 |
+
- dot_precision@10
|
36 |
+
- dot_recall@1
|
37 |
+
- dot_recall@5
|
38 |
+
- dot_recall@10
|
39 |
+
- dot_ndcg@5
|
40 |
+
- dot_ndcg@10
|
41 |
+
- dot_ndcg@100
|
42 |
+
- dot_mrr@5
|
43 |
+
- dot_mrr@10
|
44 |
+
- dot_mrr@100
|
45 |
+
- dot_map@100
|
46 |
+
widget:
|
47 |
+
- source_sentence: How does the committee assist in the identification of produce
|
48 |
+
clusters for FPOs?
|
49 |
+
sentences:
|
50 |
+
- '''8.1 CSCs under Ministry of Electronics and Information Technology (MeITY)
|
51 |
+
have been engaged to enrol non-loanee farmers. The Insurance Companies are
|
52 |
+
required to enter into a separate agreement with CSC and pay service charges
|
53 |
+
as fixed by DAC&FW, GOI per farmer per village per season. No other agreement
|
54 |
+
or payment is required to be made for this purpose. Nodal agency for engagement
|
55 |
+
with Ministry of Agriculture and Farmers Welfare and Insurance Companies will
|
56 |
+
be CSC-SPV, a company established under MeITY for carrying out e-governance
|
57 |
+
initiatives of GoI. 8.2 No charges/fee shall be borne or paid by the farmers
|
58 |
+
being enrolled through CSCs i.e. CSC-SPV and CSC-VLE 8.3 As per IRDA circular,
|
59 |
+
no separate qualification/certification will be required for the VLEs of CSCs
|
60 |
+
to facilitate enrolment of non-loanee farmers. 8.4 All empanelled Insurance
|
61 |
+
Companies will compulsorily be required to enter into an agreement with CSC
|
62 |
+
for enrolment of non-loanee farmers and for provision of other defined services
|
63 |
+
to farmers. 8.5 Other designated intermediaries may be linked with the Portal
|
64 |
+
in due course. 8.6 Empanelled Insurance Companies have to necessarily register
|
65 |
+
on the portal and submit list and details of agents/intermediaries engaged
|
66 |
+
for enrolment of non-loanee farmers in the beginning of each season within
|
67 |
+
10 days of award of work in the State. Further all agents/intermediaries have
|
68 |
+
to work strictly as per the provisions of the Scheme and IRDA regulations'''
|
69 |
+
- '''7.2.2 For the claims arising out of crop damage due to post-harvest losses
|
70 |
+
and localized risks, assessment of damage will be made on individual farm basis
|
71 |
+
as outlined in (Section 21, para 21.4 and 21.5 respectively). 7.2.3 SLCCCI
|
72 |
+
will, for the purpose of notification, consider factors such as availability of
|
73 |
+
past yield data based on CCEs for adequate number of years (at least 7 years for
|
74 |
+
calculation of threshold yield), cropped acreage and capacity for estimating yield
|
75 |
+
during proposed season, etc. State govt. should endeavour to cover all the major
|
76 |
+
crops grown in all the districts of the State. States should ensure that a standard
|
77 |
+
methodology of yield estimation exists for all the crops proposed to be notified 7.2.4 State
|
78 |
+
Govt./ UT should provide 10 years'' historical yield data in soft format(in Excel)
|
79 |
+
in English to Insurance Companies for calculation of threshold yield , premium
|
80 |
+
rates etc. at insurance unit area and in its absence, data at next higher unit/nearest
|
81 |
+
neighbouring unit/weighted average of contiguous units, as decided by the SLCCCI
|
82 |
+
shall be used. The level and name of notified area of insurance unit must be part
|
83 |
+
of notification and should be provided at the time of bidding itself. 7.2.5 In
|
84 |
+
case State Govts/UT proposes to notify irrigated and un-irrigated areas under
|
85 |
+
a crop separately, they shall ensure that minimum CCEs are planned and conducted
|
86 |
+
for irrigated and un-irrigated crops separately in such areas.'''
|
87 |
+
- '''(i) It will regularly monitor and review the progress of FPO development and functioning
|
88 |
+
by holding its regular meetings. (ii) It will work out a strategy for all stakeholders
|
89 |
+
including the Implementing Agencies (SFAC, NABARD and NCDC), institutions engaged
|
90 |
+
in formation and promotion of FPOs and State Government machinery engaged in agricultural
|
91 |
+
and rural development to work synergistically to achieve the objective. (iii) It
|
92 |
+
will identify the constraints in implementation of scheme and communicate to DAC&FW
|
93 |
+
and N-PMAFSC for taking the appropriate policy decision, if so required. (iv) It
|
94 |
+
will direct respective State Government departments to help in identification of
|
95 |
+
produce clusters for recommendation to N-PMAFSC and assist in mobilization of
|
96 |
+
farmers to form the FPOs. (v) It will formulate an effective extension mechanism
|
97 |
+
to be undertaken through existing State extension machinery down the line. (vi)
|
98 |
+
Most importantly, the committee will coordinate with respective State Government
|
99 |
+
departments to facilitate FPOs in getting the license/registration for inputs,
|
100 |
+
shops/spaces in the mandis and also availing the assistance for development of
|
101 |
+
various infrastructures relating to production and postproduction activities.
|
102 |
+
The committee will also ensure that FPOs be associated/involved in all the farmers''
|
103 |
+
centric schemes of the Government to the extent feasible. (vii) It will facilitate
|
104 |
+
in making available land at appropriate place for development of common facility
|
105 |
+
center and also custom hiring center. It will also strategize and prioritize
|
106 |
+
for linking of Common Facility Centre (CFC) with e-NAM or with any other e-trading
|
107 |
+
platform.'''
|
108 |
+
- source_sentence: How can the Standardized Precipitation Index (SPI) be developed?
|
109 |
+
sentences:
|
110 |
+
- '''Identification of Outliers: All these above analyses can be used to check whether
|
111 |
+
there was any reason for yield deviation as presented in the CCE data. Then a
|
112 |
+
yield proxy map may be prepared. The Yield proxy map can be derived from remote
|
113 |
+
sensing vegetation indices (single or combination of indices), crop simulation
|
114 |
+
model output, or an integration of various parameters, which are related to crop
|
115 |
+
yield, such as soil, weather (gridded), satellite based products, etc. Whatever,
|
116 |
+
yield proxies to be used, it is the responsibility of the organization to record documentary
|
117 |
+
evidence (from their or other''s published work) that the yield proxy is related
|
118 |
+
to the particular crop''s yield. Then the IU level yields need to be overlaid
|
119 |
+
on the yield proxy map. Both yield proxy and CCE yield can be divided into 4-5
|
120 |
+
categories (e.g. Very good, Good, Medium, Poor, Very poor). Wherever there is
|
121 |
+
large mismatch between yield proxy and the CCE yield (more than 2 levels), the
|
122 |
+
CCE yield for that IU can be considered, as outliers.'''
|
123 |
+
- '''i. Shareholder List and Share Capital contribution by each Member verified
|
124 |
+
and certified by a Chartered Accountant (CA) prior to submission (Format attached,
|
125 |
+
Annexure I- Enclosure-I). ii. Resolution of FPO Board/Governing Council to seek
|
126 |
+
Equity Grant for Members (Format attached, Annexure I- Enclosure-II). iii. Consent
|
127 |
+
of Shareholders, stating name of shareholder, gender, number of shares held, face
|
128 |
+
value of shares, land holding, and signature, signifying consent for Implementing
|
129 |
+
Agency to directly transfer the Equity Grant sanctioned to the FPC on their behalf,
|
130 |
+
to FPC Bank account, against the consideration of additional shares of equivalent
|
131 |
+
value to be issued to them by FPC and on exit- transfer of the shares as per rules
|
132 |
+
(Format attached, Annexure I-Enclosure-III). iv. Audited Financials of FPO for
|
133 |
+
a minimum 1 year/for all years of existence of the FPO if formed less than three
|
134 |
+
years prior to application/ for the last 3 years for FPO in existence for 3 years
|
135 |
+
or more, verified and certified by a Chartered Accountant (CA) prior to submission.
|
136 |
+
v. Photocopy of FPO Bank Account Statement for last six months authenticated by
|
137 |
+
Branch Manager. vi. Business plan and budget for next 18 months. vii. Names, photographs,
|
138 |
+
and identity proof (one from among ration card, Aadhaar card, election identification
|
139 |
+
card, and passport of Representatives/ Directors authorized by the Board for executing
|
140 |
+
and signing all documents under the Scheme. viii. Each page of Application Form and
|
141 |
+
accompanying documents should be signed by a minimum of two Board Member Authorised
|
142 |
+
Representatives of FPO;'''
|
143 |
+
- '''gridded or satellite based data, Dry-spell Occurrence, Temperature Anomaly,
|
144 |
+
Soil Moisture Analysis (either from satellite or model data) and any other available
|
145 |
+
weather parameters (related to crop condition), either from ground or satellite
|
146 |
+
data. Another rainfall based index, which can also be studied, is Standardized
|
147 |
+
Precipitation Index (SPI). SPI can be developed using IMD gridded rainfall data
|
148 |
+
or NOAA CPC rainfall data. However, while using gridded data (either from satellite
|
149 |
+
or ground stations), appropriate resolution should be used at appropriate level.
|
150 |
+
For example, 0.25 degree data should be used only at district level and not at
|
151 |
+
block/village level. High resolution weather data can also be sourced from weather
|
152 |
+
companies.'''
|
153 |
+
- source_sentence: What will Implementing Agencies do in consultation with DAC&FW
|
154 |
+
regarding FPOs?
|
155 |
+
sentences:
|
156 |
+
- '''The protocol of timelines defined above shall also be applicable to the actual
|
157 |
+
Yield data being provided by the concerned State department in batches, i.e. Crop-wise-District-wise
|
158 |
+
lots of data and timelines for each batch of data shall be counted separately/batch
|
159 |
+
wise. Unnecessary delay in finalization of yield data will attract penal interest
|
160 |
+
on due claim amount payable to the farmers as decided by the SLCC in the matter. 19.3 The
|
161 |
+
existing Technical Advisory Committee (TAC) comprising of representatives from
|
162 |
+
Directorate of Economics and Statistics, DAC&FW, MNCFC, NSSO, IASRI, will be
|
163 |
+
further augmented and constituted under the chairmanship of **Additional Secretary,
|
164 |
+
DAC&FW** and additional members depending on case to case basis will be drawn
|
165 |
+
from various organizations namely, relevant institution under Indian Council of
|
166 |
+
Agriculture Research (ICAR), Indian Agriculture Research Institute (IARI), National
|
167 |
+
Remote Sensing Centre (NRSC), Space Applications Centre (SAC), Central Statistical
|
168 |
+
Organization(CSO), Insurance Regulatory and Development Authority of India (IRDAI),
|
169 |
+
Reserve Bank of India (RBI), National Bank for Agriculture & Rural Development
|
170 |
+
(NABARD), India Meteorological Department (IMD) or any other such organisation. 19.4 The
|
171 |
+
role of TAC shall be to review the technical matters related to execution of the
|
172 |
+
Scheme. In case, the matter requires further focused deliberations, the TAC will
|
173 |
+
refer the issue to Technical Agency (as defined in the SOP for yield dispute).
|
174 |
+
Technical Agency (TA) will compulsorily take input/reports/relevant information
|
175 |
+
from concerned State Department and Insurance Company and may also opt for representatives
|
176 |
+
of other organizations/experts (if required) with permission of chairman of TAC
|
177 |
+
and follow the procedure as defined in the SOP. 19.5 State shall also constitute
|
178 |
+
the State Level Technical Advisory Committee (STAC) on similar pattern to resolve
|
179 |
+
disputes.'''
|
180 |
+
- '''Name of Implementing Agency (NABARD/NCDC):.............................................
|
181 |
+
Address: ...........................................................................................................
|
182 |
+
...........................................................................................................
|
183 |
+
................................................................................................................. Phone
|
184 |
+
Number: ............................................................................. (Each
|
185 |
+
page of the application form should be signed by Branch head and Zonal Manager) Name
|
186 |
+
and Address of the applicant Bank Branch : 1 a) Complete Postal Address (*with
|
187 |
+
pin-code) : 1 b) Phone No. with STD : 1 c) Fax No.: 1 d) E-Mail Address: 1
|
188 |
+
e) Details of the authorised Designation Mobile No. E-Mail Address. person
|
189 |
+
of the Bank submitting the Claim: 2 Name of Borrower FPO : 2 a) Constitution: Producer
|
190 |
+
Organization 2 b) Registered Office Address (*with pin-code): (i). Phone No. (ii).
|
191 |
+
Fax No. (iii). E-mail Address 2 c) Business Office Address (if any) (i). Phone
|
192 |
+
No. (ii). Fax No. (iii). E-mail Address 2 d) Name of CEO : Mobile No. 2 e)
|
193 |
+
Credit Facility for which guarantee cover sought : Old New Expansion Technical
|
194 |
+
Upgradation 2 f ) Give details of components:- Inputs: Processing: Marketing: Any
|
195 |
+
other: Total Investment: 3 Banking Facilities Sanctioned by sanctioning authority
|
196 |
+
(Rs. in Lakh):- (i). Term-Loan : Date of Sanction: Amount Outstanding: IRAC
|
197 |
+
Status: IRAC Status: (ii).Cash Credit : Date of Sanction: Amount Outstanding: 3
|
198 |
+
a) Sanctioning Office: Branch: ZO / RO: HO: 3 b) Designation of Sanctioning
|
199 |
+
Authority : 3 c) Sanctioning authority approval vide : 3 d) Sanction /
|
200 |
+
Appraisal Note No. Dated: 3 e) Agenda No. / Minutes conveying sanction : 4 Name
|
201 |
+
and Address of Controlling Office of the Branch (*with pin-code): 4.a). Name
|
202 |
+
of Controlling Authority : 4.b). Mobile No.: 4.c). Fax. No. : 4.d). E-Mail
|
203 |
+
Address. : 5 Present status of FPO Activity : (Give component wise details) 5.
|
204 |
+
a) 5. b). 5. c). 5. d). 5. e). 5. f ) 6 Status of Accounts 6. a).
|
205 |
+
Term-Loan: Amount of Disbursement till date : Outstanding as on date : i).'''
|
206 |
+
- '''(i) Implementing Agencies will closely and cohesively work with CBBOs to ensure
|
207 |
+
that CBBOs perform their activities to make FPOs economically sustainable. (ii)
|
208 |
+
Implementing Agencies will also monitor CBBOs to ensure regular data entry on
|
209 |
+
integrated portal with respect to details of respective FPOs. (iii) Implementing
|
210 |
+
Agencies can operate through their MIS portal till Integrated Portal is put in
|
211 |
+
place to ensure uniformity of database on FPO. Once national level Integrated
|
212 |
+
Portal managed through National Project Management Agency (NPMA) is put in place,
|
213 |
+
Implementing Agencies will have to ensure interoperability with Integrated Portal
|
214 |
+
to ensure smooth data transfer and operate in coordination with Integrated Portal
|
215 |
+
design and requirement. (iv) NABARD and NCDC will maintain and manage Credit
|
216 |
+
Guarantee Fund (CGF) as per the established procedure. (v) Implementing Agencies
|
217 |
+
in consultation with DAC&FW will formulate rating tools for FPOs to assess them
|
218 |
+
in terms of level of activity, economic viability and sustainability, etc. The
|
219 |
+
rating of the FPOs can be used as an instrument to promote FPOs. (vi) Implementing
|
220 |
+
Agencies will prepare Annual Action Plan and submit to DAC&FW in advance for
|
221 |
+
consideration of Project Management Advisory and Fund Sanctioning Committee (N-PMAFSC)
|
222 |
+
along with prescribed Utilization Certificate. (vii) As assigned by DAC&FW/N-PMAFSC,
|
223 |
+
Implementing Agency will coordinate with concerned Value-Chain Organization(s)
|
224 |
+
regarding stages of formation and promotion of FPOs by those organizations along
|
225 |
+
with FPO management cost & utilization of previous amount along with documentary
|
226 |
+
proof from time to time as well as requirement of Equity Grant for channelizing
|
227 |
+
their claim to N-PMAFSC for payment.'''
|
228 |
+
- source_sentence: How is the bidding process conducted?
|
229 |
+
sentences:
|
230 |
+
- '''Identification of Outliers: All these above analyses can be used to check whether
|
231 |
+
there was any reason for yield deviation as presented in the CCE data. Then a
|
232 |
+
yield proxy map may be prepared. The Yield proxy map can be derived from remote
|
233 |
+
sensing vegetation indices (single or combination of indices), crop simulation
|
234 |
+
model output, or an integration of various parameters, which are related to crop
|
235 |
+
yield, such as soil, weather (gridded), satellite based products, etc. Whatever,
|
236 |
+
yield proxies to be used, it is the responsibility of the organization to record documentary
|
237 |
+
evidence (from their or other''s published work) that the yield proxy is related
|
238 |
+
to the particular crop''s yield. Then the IU level yields need to be overlaid
|
239 |
+
on the yield proxy map. Both yield proxy and CCE yield can be divided into 4-5
|
240 |
+
categories (e.g. Very good, Good, Medium, Poor, Very poor). Wherever there is
|
241 |
+
large mismatch between yield proxy and the CCE yield (more than 2 levels), the
|
242 |
+
CCE yield for that IU can be considered, as outliers.'''
|
243 |
+
- '''However, in absence of insured area of last year/season for all proposed crops
|
244 |
+
or any crop, net sown area of that crop(s) will be considered for calculation
|
245 |
+
of weighted premium of district. This data will be used for calculation of L1
|
246 |
+
only. 7.1.5 Bidding **shall be done through e-tendering** and work order may
|
247 |
+
be released within 2 weeks of the opening of the Tender. 7.1.6 Depending on
|
248 |
+
the risk profile, historical loss cost and cost benefit analysis for the proposed
|
249 |
+
crop(s) in district(s) of any cluster, if the State Government feels that the
|
250 |
+
premium rate likely to be offered by bidding Insurance Companies would be abnormally
|
251 |
+
high, then the State Govt. can fix a ceiling on premium rates for such crop(s)
|
252 |
+
proposed to be included in the bidding evaluation for the bidding period. However,
|
253 |
+
recourse to this ceiling provision may be done only in well justified cases and
|
254 |
+
not as a general practice. The ceiling premium rate may be derived based on statistical
|
255 |
+
evaluation/actuarial premium analysis, loss cost, historical payout etc and name
|
256 |
+
of such crop should be disclosed by State Govt. compulsorily in the tender document. 7.1.7 In
|
257 |
+
such cases where a ceiling has been indicated, State government must call financial
|
258 |
+
bids in two step bidding or in two separate envelopes. First bid/envelop is for
|
259 |
+
disclosing the premium rate offered by each participating Insurance Company for
|
260 |
+
such ceiling crops and must be categorised under \''Ceiling Premium Rate\'' and 2nd bid
|
261 |
+
envelop is for bidding of crop wise premium rate for all crops included in tender.
|
262 |
+
Time interval for opening of both bid/envelop should be compulsorily mentioned
|
263 |
+
in the bidding documents and should preferably be on the same day. All participating
|
264 |
+
Insurance Companies have to submit the bid offer as per the procedure mentioned
|
265 |
+
above. 7.1.8 State Govt.'''
|
266 |
+
- ''' This consent of the beneficiary should be \''to agree that the department
|
267 |
+
responsible for implementation of the Pradhan Mantri Kisan Samman Nidhi Yojana
|
268 |
+
in Union Government or the State Government / Union Territory Administration can
|
269 |
+
use the beneficiary Aadhaar number and other information provided in the declaration
|
270 |
+
to verify the eligibility of the beneficiary for scheme as per extant scheme guidelines
|
271 |
+
with the concerned agencies\''. 6.3 The existing land-ownership system in the
|
272 |
+
concerned State / UT will be used for identification of beneficiaries. Accordingly,
|
273 |
+
it is of utmost importance that the land records are clear and updated. Further,
|
274 |
+
State / UT Governments would also expedite the progress of digitization of
|
275 |
+
the land records and linking the same with Aadhaar as well as bank details of
|
276 |
+
the beneficiaries. 6.4 The lists of eligible beneficiaries would be published
|
277 |
+
at the village level. Farmers'' families who are eligible but have been excluded
|
278 |
+
should be provided an opportunity to represent their case.'''
|
279 |
+
- source_sentence: What is the role of the committee in the development and functioning
|
280 |
+
of FPOs?
|
281 |
+
sentences:
|
282 |
+
- '''20.3.1 For addressing the issue of reliability of CCEs in terms of their
|
283 |
+
accuracy, representativeness and timeliness, innovative technologies such as
|
284 |
+
satellite remote sensing, drone, modeling, AWS/ARG, real time transmission of
|
285 |
+
data etc. should be utilized. This will ensure accurate assessment of yield and timely
|
286 |
+
payment of claims to farmers. Various studies carried out by national and international
|
287 |
+
organizations, including MNCFC, NRSC, SAC, CCAFS, IRRI, IFPRI, World Bank, etc.
|
288 |
+
have shown that the use of satellite, weather, soil and crop data, along with
|
289 |
+
images/video capture of crop growth at various stages and accurate sample CCE
|
290 |
+
data collection can improve the yield data quality/ timeliness and support timely
|
291 |
+
claim processing and payments. 20.3.2 States, with the support of national
|
292 |
+
centres as mentioned above, SRSC and SAUs, need to carry out adequate number
|
293 |
+
of pilot studies for improved yield estimation using technology, as mentioned above,
|
294 |
+
and small number of good quality CCEs. When a significant correlation is observed
|
295 |
+
between remote sensing and weather estimated yield and yield estimated through
|
296 |
+
CCEs, States and Insurance Companies can use these technologies in estimating
|
297 |
+
the crop yields at IU level, subject to the satisfaction of both States and Insurance
|
298 |
+
Companies about the accuracy of the yield estimates, to service the claims.'''
|
299 |
+
- '''| 9. | Paid up Capital (in INR) |\n|--------------------------------------------------|--------------------------------|\n| | |\n| | |\n| | |\n|
|
300 |
+
10. | Amount of Equity Grant sought |\n|
|
301 |
+
(in INR) | |\n| | |\n|
|
302 |
+
11. | Maximum shareholding of an |\n|
|
303 |
+
Individual Shareholder Member | |\n| | |\n|
|
304 |
+
12. | Bank name in which account
|
305 |
+
is |\n| maintained | |\n|
|
306 |
+
13. | Account number |\n| | |\n| | |\n|
|
307 |
+
14. | Branch name & IFSC code |\n| | |\n| | |\n|
|
308 |
+
15. | Number of Directors with their
|
309 |
+
|\n| briefs | |\n| | |\n| | |\n|
|
310 |
+
16. | Mode |\n|
|
311 |
+
(election/ nomination) | |\n| | |\n| | |\n|
|
312 |
+
17. | Number of Women Director(s) |\n| | |\n| | |\n| | |\n| | |\n|
|
313 |
+
18. | Date(s) of Board/Governing |\n|
|
314 |
+
Body Meetings held in the last | |\n|
|
315 |
+
year | |\n|
|
316 |
+
19. | Number of functional |\n|
|
317 |
+
committees of the FPO: | |\n|
|
318 |
+
(Mention the major activities of | |\n|
|
319 |
+
each committee) | |\n|
|
320 |
+
1. | |\n|
|
321 |
+
2. | |\n|
|
322 |
+
3. | |\n| | |\n|
|
323 |
+
20. | Roles & Responsibility of |\n|
|
324 |
+
Boards/ Governing Body | |\n|
|
325 |
+
1. | |\n|
|
326 |
+
2. | |\n|
|
327 |
+
3. | |\n|
|
328 |
+
4. | |\n| | |\n| | |\n| | |\n|
|
329 |
+
2. Details of Board of Directors/Governing Body- | |\n|
|
330 |
+
DIN | |\n|
|
331 |
+
Number | |\n|
|
332 |
+
Aadhar | |\n|
|
333 |
+
Numbe | |\n|
|
334 |
+
r | |\n|
|
335 |
+
Contact | |\n|
|
336 |
+
No./ | |\n|
|
337 |
+
Address | |\n|
|
338 |
+
Land | |\n|
|
339 |
+
holding | |\n|
|
340 |
+
(in Acres) | |\n|
|
341 |
+
S. | |\n|
|
342 |
+
No | |\n|
|
343 |
+
. | |\n|
|
344 |
+
Designati | |\n|
|
345 |
+
on/ Role | |\n|
|
346 |
+
in the | |\n|
|
347 |
+
FPO | |\n|
|
348 |
+
Qualificati | |\n|
|
349 |
+
ons | |\n|
|
350 |
+
Tenure (in | |\n|
|
351 |
+
yrs.) | |\n|
|
352 |
+
Name of the | |\n|
|
353 |
+
Directors of | |\n|
|
354 |
+
Board/ | |\n|
|
355 |
+
Governing | |\n|
|
356 |
+
Body | |\n|
|
357 |
+
1. | |\n|
|
358 |
+
2. | |\n|
|
359 |
+
3. | |\n|
|
360 |
+
4. | |\n| | |\n| | |\n| | |\n| | |\n| | |'''
|
361 |
+
- '''(i) It will closely monitor and review the progress of FPO development and functioning
|
362 |
+
by holding its regular meetings. (ii) It will suggest the potential produce
|
363 |
+
clusters in the district (where FPOs can be formed & promoted) to N-PMAFSC and
|
364 |
+
will also assist Implementing Agencies, CBBOs and other stakeholders in identification
|
365 |
+
of cluster(s) and activity (ies) and also in mobilization of farmers. (iii) It
|
366 |
+
will resolve the financial constraints of FPOs through District Level Bankers'' Committee
|
367 |
+
and provide feedback to N-PMAFSC. (iv) It will identify the constraints in implementation
|
368 |
+
of scheme at the ground level and communicate the same to State Level Consultative
|
369 |
+
Committee for further taking up the matter with DAC&FW and N-PMAFSC for appropriate
|
370 |
+
policy decision. (v) Any other matter so decided by the committee in the interest
|
371 |
+
of the scheme and farmers. 14.4 Close and effective monitoring has been considered
|
372 |
+
a major trigger for success of this scheme. Therefore, in addition to three tiered
|
373 |
+
afore-stated structured mechanism for monitoring of the scheme, there shall be
|
374 |
+
continuous in-house monitoring by DAC&FW and by the Implementing Agencies also.
|
375 |
+
The DAC&FW may utilize the services of Directorate of Marketing & Inspection (DMI),
|
376 |
+
which has existence through its Regional & Sub-Offices across the country. For
|
377 |
+
effective monitoring, DAC&FW may engage consultants also and cost for same will
|
378 |
+
be borne from the budget of the scheme itself.'''
|
379 |
+
pipeline_tag: sentence-similarity
|
380 |
+
model-index:
|
381 |
+
- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
382 |
+
results:
|
383 |
+
- task:
|
384 |
+
type: information-retrieval
|
385 |
+
name: Information Retrieval
|
386 |
+
dataset:
|
387 |
+
name: val evaluator
|
388 |
+
type: val_evaluator
|
389 |
+
metrics:
|
390 |
+
- type: cosine_accuracy@1
|
391 |
+
value: 0.46
|
392 |
+
name: Cosine Accuracy@1
|
393 |
+
- type: cosine_accuracy@5
|
394 |
+
value: 0.83
|
395 |
+
name: Cosine Accuracy@5
|
396 |
+
- type: cosine_accuracy@10
|
397 |
+
value: 0.89
|
398 |
+
name: Cosine Accuracy@10
|
399 |
+
- type: cosine_precision@1
|
400 |
+
value: 0.46
|
401 |
+
name: Cosine Precision@1
|
402 |
+
- type: cosine_precision@5
|
403 |
+
value: 0.16599999999999995
|
404 |
+
name: Cosine Precision@5
|
405 |
+
- type: cosine_precision@10
|
406 |
+
value: 0.08899999999999997
|
407 |
+
name: Cosine Precision@10
|
408 |
+
- type: cosine_recall@1
|
409 |
+
value: 0.46
|
410 |
+
name: Cosine Recall@1
|
411 |
+
- type: cosine_recall@5
|
412 |
+
value: 0.83
|
413 |
+
name: Cosine Recall@5
|
414 |
+
- type: cosine_recall@10
|
415 |
+
value: 0.89
|
416 |
+
name: Cosine Recall@10
|
417 |
+
- type: cosine_ndcg@5
|
418 |
+
value: 0.6567480575340123
|
419 |
+
name: Cosine Ndcg@5
|
420 |
+
- type: cosine_ndcg@10
|
421 |
+
value: 0.6772203266148452
|
422 |
+
name: Cosine Ndcg@10
|
423 |
+
- type: cosine_ndcg@100
|
424 |
+
value: 0.7023864779933896
|
425 |
+
name: Cosine Ndcg@100
|
426 |
+
- type: cosine_mrr@5
|
427 |
+
value: 0.5991666666666667
|
428 |
+
name: Cosine Mrr@5
|
429 |
+
- type: cosine_mrr@10
|
430 |
+
value: 0.6082619047619049
|
431 |
+
name: Cosine Mrr@10
|
432 |
+
- type: cosine_mrr@100
|
433 |
+
value: 0.6142083088480149
|
434 |
+
name: Cosine Mrr@100
|
435 |
+
- type: cosine_map@100
|
436 |
+
value: 0.6142083088480148
|
437 |
+
name: Cosine Map@100
|
438 |
+
- type: dot_accuracy@1
|
439 |
+
value: 0.46
|
440 |
+
name: Dot Accuracy@1
|
441 |
+
- type: dot_accuracy@5
|
442 |
+
value: 0.83
|
443 |
+
name: Dot Accuracy@5
|
444 |
+
- type: dot_accuracy@10
|
445 |
+
value: 0.89
|
446 |
+
name: Dot Accuracy@10
|
447 |
+
- type: dot_precision@1
|
448 |
+
value: 0.46
|
449 |
+
name: Dot Precision@1
|
450 |
+
- type: dot_precision@5
|
451 |
+
value: 0.16599999999999995
|
452 |
+
name: Dot Precision@5
|
453 |
+
- type: dot_precision@10
|
454 |
+
value: 0.08899999999999997
|
455 |
+
name: Dot Precision@10
|
456 |
+
- type: dot_recall@1
|
457 |
+
value: 0.46
|
458 |
+
name: Dot Recall@1
|
459 |
+
- type: dot_recall@5
|
460 |
+
value: 0.83
|
461 |
+
name: Dot Recall@5
|
462 |
+
- type: dot_recall@10
|
463 |
+
value: 0.89
|
464 |
+
name: Dot Recall@10
|
465 |
+
- type: dot_ndcg@5
|
466 |
+
value: 0.6580573550697268
|
467 |
+
name: Dot Ndcg@5
|
468 |
+
- type: dot_ndcg@10
|
469 |
+
value: 0.6785296241505598
|
470 |
+
name: Dot Ndcg@10
|
471 |
+
- type: dot_ndcg@100
|
472 |
+
value: 0.7036957755291041
|
473 |
+
name: Dot Ndcg@100
|
474 |
+
- type: dot_mrr@5
|
475 |
+
value: 0.6008333333333334
|
476 |
+
name: Dot Mrr@5
|
477 |
+
- type: dot_mrr@10
|
478 |
+
value: 0.6099285714285716
|
479 |
+
name: Dot Mrr@10
|
480 |
+
- type: dot_mrr@100
|
481 |
+
value: 0.6158749755146816
|
482 |
+
name: Dot Mrr@100
|
483 |
+
- type: dot_map@100
|
484 |
+
value: 0.6158749755146814
|
485 |
+
name: Dot Map@100
|
486 |
+
---
|
487 |
+
|
488 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
489 |
+
|
490 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
491 |
+
|
492 |
+
## Model Details
|
493 |
+
|
494 |
+
### Model Description
|
495 |
+
- **Model Type:** Sentence Transformer
|
496 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision 8b3219a92973c328a8e22fadcfa821b5dc75636a -->
|
497 |
+
- **Maximum Sequence Length:** 256 tokens
|
498 |
+
- **Output Dimensionality:** 384 tokens
|
499 |
+
- **Similarity Function:** Cosine Similarity
|
500 |
+
<!-- - **Training Dataset:** Unknown -->
|
501 |
+
<!-- - **Language:** Unknown -->
|
502 |
+
<!-- - **License:** Unknown -->
|
503 |
+
|
504 |
+
### Model Sources
|
505 |
+
|
506 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
507 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
508 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
509 |
+
|
510 |
+
### Full Model Architecture
|
511 |
+
|
512 |
+
```
|
513 |
+
SentenceTransformer(
|
514 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
515 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
516 |
+
(2): Normalize()
|
517 |
+
)
|
518 |
+
```
|
519 |
+
|
520 |
+
## Usage
|
521 |
+
|
522 |
+
### Direct Usage (Sentence Transformers)
|
523 |
+
|
524 |
+
First install the Sentence Transformers library:
|
525 |
+
|
526 |
+
```bash
|
527 |
+
pip install -U sentence-transformers
|
528 |
+
```
|
529 |
+
|
530 |
+
Then you can load this model and run inference.
|
531 |
+
```python
|
532 |
+
from sentence_transformers import SentenceTransformer
|
533 |
+
|
534 |
+
# Download from the 🤗 Hub
|
535 |
+
model = SentenceTransformer("smokxy/embedding-finetuned")
|
536 |
+
# Run inference
|
537 |
+
sentences = [
|
538 |
+
'What is the role of the committee in the development and functioning of FPOs?',
|
539 |
+
"'(i) It will closely monitor and review the progress of FPO development and functioning by holding its regular meetings. (ii) It will suggest the potential produce clusters in the district (where FPOs can be formed & promoted) to N-PMAFSC and will also assist Implementing Agencies, CBBOs and other stakeholders in identification of cluster(s) and activity (ies) and also in mobilization of farmers. (iii) It will resolve the financial constraints of FPOs through District Level Bankers' Committee and provide feedback to N-PMAFSC. (iv) It will identify the constraints in implementation of scheme at the ground level and communicate the same to State Level Consultative Committee for further taking up the matter with DAC&FW and N-PMAFSC for appropriate policy decision. (v) Any other matter so decided by the committee in the interest of the scheme and farmers. 14.4 Close and effective monitoring has been considered a major trigger for success of this scheme. Therefore, in addition to three tiered afore-stated structured mechanism for monitoring of the scheme, there shall be continuous in-house monitoring by DAC&FW and by the Implementing Agencies also. The DAC&FW may utilize the services of Directorate of Marketing & Inspection (DMI), which has existence through its Regional & Sub-Offices across the country. For effective monitoring, DAC&FW may engage consultants also and cost for same will be borne from the budget of the scheme itself.'",
|
540 |
+
"'| 9. | Paid up Capital (in INR) |\\n|--------------------------------------------------|--------------------------------|\\n| | |\\n| | |\\n| | |\\n| 10. | Amount of Equity Grant sought |\\n| (in INR) | |\\n| | |\\n| 11. | Maximum shareholding of an |\\n| Individual Shareholder Member | |\\n| | |\\n| 12. | Bank name in which account is |\\n| maintained | |\\n| 13. | Account number |\\n| | |\\n| | |\\n| 14. | Branch name & IFSC code |\\n| | |\\n| | |\\n| 15. | Number of Directors with their |\\n| briefs | |\\n| | |\\n| | |\\n| 16. | Mode |\\n| (election/ nomination) | |\\n| | |\\n| | |\\n| 17. | Number of Women Director(s) |\\n| | |\\n| | |\\n| | |\\n| | |\\n| 18. | Date(s) of Board/Governing |\\n| Body Meetings held in the last | |\\n| year | |\\n| 19. | Number of functional |\\n| committees of the FPO: | |\\n| (Mention the major activities of | |\\n| each committee) | |\\n| 1. | |\\n| 2. | |\\n| 3. | |\\n| | |\\n| 20. | Roles & Responsibility of |\\n| Boards/ Governing Body | |\\n| 1. | |\\n| 2. | |\\n| 3. | |\\n| 4. | |\\n| | |\\n| | |\\n| | |\\n| 2. Details of Board of Directors/Governing Body- | |\\n| DIN | |\\n| Number | |\\n| Aadhar | |\\n| Numbe | |\\n| r | |\\n| Contact | |\\n| No./ | |\\n| Address | |\\n| Land | |\\n| holding | |\\n| (in Acres) | |\\n| S. | |\\n| No | |\\n| . | |\\n| Designati | |\\n| on/ Role | |\\n| in the | |\\n| FPO | |\\n| Qualificati | |\\n| ons | |\\n| Tenure (in | |\\n| yrs.) | |\\n| Name of the | |\\n| Directors of | |\\n| Board/ | |\\n| Governing | |\\n| Body | |\\n| 1. | |\\n| 2. | |\\n| 3. | |\\n| 4. | |\\n| | |\\n| | |\\n| | |\\n| | |\\n| | |'",
|
541 |
+
]
|
542 |
+
embeddings = model.encode(sentences)
|
543 |
+
print(embeddings.shape)
|
544 |
+
# [3, 384]
|
545 |
+
|
546 |
+
# Get the similarity scores for the embeddings
|
547 |
+
similarities = model.similarity(embeddings, embeddings)
|
548 |
+
print(similarities.shape)
|
549 |
+
# [3, 3]
|
550 |
+
```
|
551 |
+
|
552 |
+
<!--
|
553 |
+
### Direct Usage (Transformers)
|
554 |
+
|
555 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
556 |
+
|
557 |
+
</details>
|
558 |
+
-->
|
559 |
+
|
560 |
+
<!--
|
561 |
+
### Downstream Usage (Sentence Transformers)
|
562 |
+
|
563 |
+
You can finetune this model on your own dataset.
|
564 |
+
|
565 |
+
<details><summary>Click to expand</summary>
|
566 |
+
|
567 |
+
</details>
|
568 |
+
-->
|
569 |
+
|
570 |
+
<!--
|
571 |
+
### Out-of-Scope Use
|
572 |
+
|
573 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
574 |
+
-->
|
575 |
+
|
576 |
+
## Evaluation
|
577 |
+
|
578 |
+
### Metrics
|
579 |
+
|
580 |
+
#### Information Retrieval
|
581 |
+
* Dataset: `val_evaluator`
|
582 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
583 |
+
|
584 |
+
| Metric | Value |
|
585 |
+
|:--------------------|:-----------|
|
586 |
+
| cosine_accuracy@1 | 0.46 |
|
587 |
+
| cosine_accuracy@5 | 0.83 |
|
588 |
+
| cosine_accuracy@10 | 0.89 |
|
589 |
+
| cosine_precision@1 | 0.46 |
|
590 |
+
| cosine_precision@5 | 0.166 |
|
591 |
+
| cosine_precision@10 | 0.089 |
|
592 |
+
| cosine_recall@1 | 0.46 |
|
593 |
+
| cosine_recall@5 | 0.83 |
|
594 |
+
| cosine_recall@10 | 0.89 |
|
595 |
+
| cosine_ndcg@5 | 0.6567 |
|
596 |
+
| cosine_ndcg@10 | 0.6772 |
|
597 |
+
| cosine_ndcg@100 | 0.7024 |
|
598 |
+
| cosine_mrr@5 | 0.5992 |
|
599 |
+
| cosine_mrr@10 | 0.6083 |
|
600 |
+
| cosine_mrr@100 | 0.6142 |
|
601 |
+
| cosine_map@100 | 0.6142 |
|
602 |
+
| dot_accuracy@1 | 0.46 |
|
603 |
+
| dot_accuracy@5 | 0.83 |
|
604 |
+
| dot_accuracy@10 | 0.89 |
|
605 |
+
| dot_precision@1 | 0.46 |
|
606 |
+
| dot_precision@5 | 0.166 |
|
607 |
+
| dot_precision@10 | 0.089 |
|
608 |
+
| dot_recall@1 | 0.46 |
|
609 |
+
| dot_recall@5 | 0.83 |
|
610 |
+
| dot_recall@10 | 0.89 |
|
611 |
+
| dot_ndcg@5 | 0.6581 |
|
612 |
+
| dot_ndcg@10 | 0.6785 |
|
613 |
+
| dot_ndcg@100 | 0.7037 |
|
614 |
+
| dot_mrr@5 | 0.6008 |
|
615 |
+
| dot_mrr@10 | 0.6099 |
|
616 |
+
| dot_mrr@100 | 0.6159 |
|
617 |
+
| **dot_map@100** | **0.6159** |
|
618 |
+
|
619 |
+
<!--
|
620 |
+
## Bias, Risks and Limitations
|
621 |
+
|
622 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
623 |
+
-->
|
624 |
+
|
625 |
+
<!--
|
626 |
+
### Recommendations
|
627 |
+
|
628 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
629 |
+
-->
|
630 |
+
|
631 |
+
## Training Details
|
632 |
+
|
633 |
+
### Training Hyperparameters
|
634 |
+
#### Non-Default Hyperparameters
|
635 |
+
|
636 |
+
- `eval_strategy`: steps
|
637 |
+
- `gradient_accumulation_steps`: 4
|
638 |
+
- `learning_rate`: 1e-05
|
639 |
+
- `weight_decay`: 0.01
|
640 |
+
- `num_train_epochs`: 1.0
|
641 |
+
- `warmup_ratio`: 0.1
|
642 |
+
- `load_best_model_at_end`: True
|
643 |
+
|
644 |
+
#### All Hyperparameters
|
645 |
+
<details><summary>Click to expand</summary>
|
646 |
+
|
647 |
+
- `overwrite_output_dir`: False
|
648 |
+
- `do_predict`: False
|
649 |
+
- `eval_strategy`: steps
|
650 |
+
- `prediction_loss_only`: True
|
651 |
+
- `per_device_train_batch_size`: 8
|
652 |
+
- `per_device_eval_batch_size`: 8
|
653 |
+
- `per_gpu_train_batch_size`: None
|
654 |
+
- `per_gpu_eval_batch_size`: None
|
655 |
+
- `gradient_accumulation_steps`: 4
|
656 |
+
- `eval_accumulation_steps`: None
|
657 |
+
- `learning_rate`: 1e-05
|
658 |
+
- `weight_decay`: 0.01
|
659 |
+
- `adam_beta1`: 0.9
|
660 |
+
- `adam_beta2`: 0.999
|
661 |
+
- `adam_epsilon`: 1e-08
|
662 |
+
- `max_grad_norm`: 1.0
|
663 |
+
- `num_train_epochs`: 1.0
|
664 |
+
- `max_steps`: -1
|
665 |
+
- `lr_scheduler_type`: linear
|
666 |
+
- `lr_scheduler_kwargs`: {}
|
667 |
+
- `warmup_ratio`: 0.1
|
668 |
+
- `warmup_steps`: 0
|
669 |
+
- `log_level`: passive
|
670 |
+
- `log_level_replica`: warning
|
671 |
+
- `log_on_each_node`: True
|
672 |
+
- `logging_nan_inf_filter`: True
|
673 |
+
- `save_safetensors`: True
|
674 |
+
- `save_on_each_node`: False
|
675 |
+
- `save_only_model`: False
|
676 |
+
- `restore_callback_states_from_checkpoint`: False
|
677 |
+
- `no_cuda`: False
|
678 |
+
- `use_cpu`: False
|
679 |
+
- `use_mps_device`: False
|
680 |
+
- `seed`: 42
|
681 |
+
- `data_seed`: None
|
682 |
+
- `jit_mode_eval`: False
|
683 |
+
- `use_ipex`: False
|
684 |
+
- `bf16`: False
|
685 |
+
- `fp16`: False
|
686 |
+
- `fp16_opt_level`: O1
|
687 |
+
- `half_precision_backend`: auto
|
688 |
+
- `bf16_full_eval`: False
|
689 |
+
- `fp16_full_eval`: False
|
690 |
+
- `tf32`: None
|
691 |
+
- `local_rank`: 0
|
692 |
+
- `ddp_backend`: None
|
693 |
+
- `tpu_num_cores`: None
|
694 |
+
- `tpu_metrics_debug`: False
|
695 |
+
- `debug`: []
|
696 |
+
- `dataloader_drop_last`: False
|
697 |
+
- `dataloader_num_workers`: 0
|
698 |
+
- `dataloader_prefetch_factor`: None
|
699 |
+
- `past_index`: -1
|
700 |
+
- `disable_tqdm`: False
|
701 |
+
- `remove_unused_columns`: True
|
702 |
+
- `label_names`: None
|
703 |
+
- `load_best_model_at_end`: True
|
704 |
+
- `ignore_data_skip`: False
|
705 |
+
- `fsdp`: []
|
706 |
+
- `fsdp_min_num_params`: 0
|
707 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
708 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
709 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
710 |
+
- `deepspeed`: None
|
711 |
+
- `label_smoothing_factor`: 0.0
|
712 |
+
- `optim`: adamw_torch
|
713 |
+
- `optim_args`: None
|
714 |
+
- `adafactor`: False
|
715 |
+
- `group_by_length`: False
|
716 |
+
- `length_column_name`: length
|
717 |
+
- `ddp_find_unused_parameters`: None
|
718 |
+
- `ddp_bucket_cap_mb`: None
|
719 |
+
- `ddp_broadcast_buffers`: False
|
720 |
+
- `dataloader_pin_memory`: True
|
721 |
+
- `dataloader_persistent_workers`: False
|
722 |
+
- `skip_memory_metrics`: True
|
723 |
+
- `use_legacy_prediction_loop`: False
|
724 |
+
- `push_to_hub`: False
|
725 |
+
- `resume_from_checkpoint`: None
|
726 |
+
- `hub_model_id`: None
|
727 |
+
- `hub_strategy`: every_save
|
728 |
+
- `hub_private_repo`: False
|
729 |
+
- `hub_always_push`: False
|
730 |
+
- `gradient_checkpointing`: False
|
731 |
+
- `gradient_checkpointing_kwargs`: None
|
732 |
+
- `include_inputs_for_metrics`: False
|
733 |
+
- `eval_do_concat_batches`: True
|
734 |
+
- `fp16_backend`: auto
|
735 |
+
- `push_to_hub_model_id`: None
|
736 |
+
- `push_to_hub_organization`: None
|
737 |
+
- `mp_parameters`:
|
738 |
+
- `auto_find_batch_size`: False
|
739 |
+
- `full_determinism`: False
|
740 |
+
- `torchdynamo`: None
|
741 |
+
- `ray_scope`: last
|
742 |
+
- `ddp_timeout`: 1800
|
743 |
+
- `torch_compile`: False
|
744 |
+
- `torch_compile_backend`: None
|
745 |
+
- `torch_compile_mode`: None
|
746 |
+
- `dispatch_batches`: None
|
747 |
+
- `split_batches`: None
|
748 |
+
- `include_tokens_per_second`: False
|
749 |
+
- `include_num_input_tokens_seen`: False
|
750 |
+
- `neftune_noise_alpha`: None
|
751 |
+
- `optim_target_modules`: None
|
752 |
+
- `batch_eval_metrics`: False
|
753 |
+
- `batch_sampler`: batch_sampler
|
754 |
+
- `multi_dataset_batch_sampler`: proportional
|
755 |
+
|
756 |
+
</details>
|
757 |
+
|
758 |
+
### Training Logs
|
759 |
+
| Epoch | Step | Training Loss | loss | val_evaluator_dot_map@100 |
|
760 |
+
|:---------:|:------:|:-------------:|:----------:|:-------------------------:|
|
761 |
+
| **0.531** | **15** | **0.538** | **0.0678** | **0.6159** |
|
762 |
+
| 0.9912 | 28 | - | 0.0678 | 0.6159 |
|
763 |
+
|
764 |
+
* The bold row denotes the saved checkpoint.
|
765 |
+
|
766 |
+
### Framework Versions
|
767 |
+
- Python: 3.10.14
|
768 |
+
- Sentence Transformers: 3.0.1
|
769 |
+
- Transformers: 4.41.1
|
770 |
+
- PyTorch: 2.3.0+cu121
|
771 |
+
- Accelerate: 0.27.2
|
772 |
+
- Datasets: 2.19.1
|
773 |
+
- Tokenizers: 0.19.1
|
774 |
+
|
775 |
+
## Citation
|
776 |
+
|
777 |
+
### BibTeX
|
778 |
+
|
779 |
+
#### Sentence Transformers
|
780 |
+
```bibtex
|
781 |
+
@inproceedings{reimers-2019-sentence-bert,
|
782 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
783 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
784 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
785 |
+
month = "11",
|
786 |
+
year = "2019",
|
787 |
+
publisher = "Association for Computational Linguistics",
|
788 |
+
url = "https://arxiv.org/abs/1908.10084",
|
789 |
+
}
|
790 |
+
```
|
791 |
+
|
792 |
+
#### GISTEmbedLoss
|
793 |
+
```bibtex
|
794 |
+
@misc{solatorio2024gistembed,
|
795 |
+
title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
|
796 |
+
author={Aivin V. Solatorio},
|
797 |
+
year={2024},
|
798 |
+
eprint={2402.16829},
|
799 |
+
archivePrefix={arXiv},
|
800 |
+
primaryClass={cs.LG}
|
801 |
+
}
|
802 |
+
```
|
803 |
+
|
804 |
+
<!--
|
805 |
+
## Glossary
|
806 |
+
|
807 |
+
*Clearly define terms in order to be accessible across audiences.*
|
808 |
+
-->
|
809 |
+
|
810 |
+
<!--
|
811 |
+
## Model Card Authors
|
812 |
+
|
813 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
814 |
+
-->
|
815 |
+
|
816 |
+
<!--
|
817 |
+
## Model Card Contact
|
818 |
+
|
819 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
820 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 384,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 1536,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 6,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.41.1",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.41.1",
|
5 |
+
"pytorch": "2.3.0+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f24c6d91b3ccc57fcdfca3c1843e755250069f409eff2b155e48a2146b4b0b5f
|
3 |
+
size 90864192
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 128,
|
50 |
+
"model_max_length": 256,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "[PAD]",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "[SEP]",
|
57 |
+
"stride": 0,
|
58 |
+
"strip_accents": null,
|
59 |
+
"tokenize_chinese_chars": true,
|
60 |
+
"tokenizer_class": "BertTokenizer",
|
61 |
+
"truncation_side": "right",
|
62 |
+
"truncation_strategy": "longest_first",
|
63 |
+
"unk_token": "[UNK]"
|
64 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|