File size: 35,606 Bytes
4c3bdaf
 
 
757cf7f
 
4c3bdaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757cf7f
 
 
 
 
 
aa5520d
757cf7f
4c3bdaf
aa5520d
 
4c3bdaf
aa5520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757cf7f
aa5520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757cf7f
aa5520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757cf7f
aa5520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757cf7f
aa5520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c3bdaf
 
 
 
 
 
 
 
 
 
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
aa5520d
4c3bdaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa5520d
 
 
4c3bdaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa5520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c3bdaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa5520d
4c3bdaf
 
 
 
 
 
 
 
 
 
 
 
 
aa5520d
 
4c3bdaf
 
aa5520d
4c3bdaf
757cf7f
4c3bdaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757cf7f
 
4c3bdaf
 
 
 
 
 
aa5520d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c3bdaf
 
 
 
 
 
757cf7f
 
 
 
4c3bdaf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
---
base_model: BAAI/bge-small-en-v1.5
datasets: []
language: []
library_name: sentence-transformers
metrics:
- cosine_accuracy@1
- cosine_accuracy@5
- cosine_accuracy@10
- cosine_precision@1
- cosine_precision@5
- cosine_precision@10
- cosine_recall@1
- cosine_recall@5
- cosine_recall@10
- cosine_ndcg@5
- cosine_ndcg@10
- cosine_ndcg@100
- cosine_mrr@5
- cosine_mrr@10
- cosine_mrr@100
- cosine_map@100
- dot_accuracy@1
- dot_accuracy@5
- dot_accuracy@10
- dot_precision@1
- dot_precision@5
- dot_precision@10
- dot_recall@1
- dot_recall@5
- dot_recall@10
- dot_ndcg@5
- dot_ndcg@10
- dot_ndcg@100
- dot_mrr@5
- dot_mrr@10
- dot_mrr@100
- dot_map@100
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- dataset_size:7033
- loss:GISTEmbedLoss
widget:
- source_sentence: How will the performance of CBBOs be assessed in the third and
    fourth year?
  sentences:
  - ''' (iv) In third and fourth year, performance of the CBBOs will be assessed  based
    on - (a) issuing Share Certificates to each member in third  year, if any; (b)
    audited Financial Statements for FPOs for second year and third year in due time
    and filing as required; (c) MoU and vendor registration as per Business Plan with
    Marketing Agencies/Institutional Buyers; (d) trading/uploading of produce in e-NAM/other
    sources, if any; (e) second tranche equity grant  to FPOs, if any; and (f) second
    tranche of credit guarantee facility, if any .    (v) In the fifth year, performance
    of the CBBOs will be assessed based on  (a) audited Statements of accounts of
    FPO and filing it; (b) 100% of agri-business plan executed and value chain developed;  (c)
    revenue model showing financial growth in last 3 consecutive  years; (d) detailed
    project completion Report; and (e) third tranche of credit guarantee facility
    if any.'''
  - '''5. Tussock caterpillar, Notolopus (=Orygyia) postica , Lymantriidae, Lepidoptera
    Symptom of damage:  Defoliation. Nature of damage:  Caterpillars of the moth feed
    on the leaves. Egg: Eggs are laid in clusters on the leaves and covered over with
    hairs. Larva:  Caterpillars are gregarious in young stages. Full grown larva possess
    a brown head, a pair of long pencil of hairs projecting forwardly from the prothorax,
    yellowish tuft of hairs arising from the lateral side of the first two abdominal
    segment and long brownish hairs arising from 8 th abdominal segment. Pupa:  Pupation
    takes place in silken cocoon. Adult:  Small adult with yellowish brown wings.
    Female moth is wingless. Presence of bipectinate antenna.'''
  - '''The Kisan Credit Card (KCC) scheme was introduced in 1998 for issue of Kisan
    Credit Cards to farmers on the basis of their holdings for uniform adoption by
    the banks so that farmers may use them to readily purchase agriculture inputs
    such as seeds, fertilizers, pesticides etc. and draw cash for their production
    needs. The scheme was further extended for the investment credit requirement of
    farmers viz. allied and non-farm activities in the year 2004. The scheme was further
    revisited in 2012 by a working Group under the Chairmanship of Shri T. M. Bhasin,
    CMD, Indian Bank with a view to simplify the scheme and facilitate issue of Electronic
    Kisan Credit Cards. The scheme provides broad guidelines to banks for operationalizing
    the KCC scheme. Implementing banks will have the discretion to adopt the same
    to suit institution/location specific requirements.'''
- source_sentence: How should State Government disclose ceiling premium rate for a
    crop in the tender document?
  sentences:
  - '''However, in absence of insured area of last year/season  for all proposed crops
    or any crop, net sown area of that crop(s) will be considered for calculation
    of weighted premium of district. This data will be used for calculation of L1
    only.  7.1.5  Bidding **shall be done through e-tendering** and work order may
    be released within 2 weeks of the  opening of the Tender.  7.1.6  Depending on
    the risk profile, historical loss cost and cost benefit analysis for the proposed
    crop(s) in district(s) of any cluster, if the State Government feels that the
    premium rate likely to be offered by bidding Insurance Companies would be abnormally
    high, then the State Govt. can fix a ceiling on  premium rates for such crop(s)
    proposed to be included in the bidding evaluation for the bidding period. However,
    recourse to this ceiling provision may be done only in well justified cases and
    not as  a general practice. The ceiling premium rate may be derived based on statistical
    evaluation/actuarial premium analysis, loss cost, historical payout etc and name
    of such crop should be disclosed by State  Govt. compulsorily in the tender document.  7.1.7  In
    such cases where a ceiling has been indicated, State government must call financial
    bids in two step  bidding or in two separate envelopes. First bid/envelop is for
    disclosing the premium rate offered by each participating Insurance Company for
    such ceiling crops and must be categorised under \''Ceiling  Premium Rate\'' and  2nd  bid
    envelop is for bidding of crop wise premium rate for all crops included in tender.
    Time interval for opening of both bid/envelop should be compulsorily mentioned
    in the bidding documents and should preferably be on the same day. All participating
    Insurance Companies have to submit the bid offer as per the procedure mentioned
    above.  7.1.8  State Govt.'''
  - '''| Chapters      | Particulars                                                |
    Page No.    |\n|---------------|------------------------------------------------------------|-------------|\n|
    1             | Concept of Producer Organisation                           | 1           |\n|
    2             | Producer Organisation Registered as Cooperative Society    | 15          |\n|
    3             | Producer Organisation Registered as Producer Company       | 19          |\n|
    4             | Producer Organisation Registered as Non-Profit Society     | 33          |\n|
    5             | Producer Organisation Registered as Trust                  | 36          |\n|
    6             | Producer Organisation Registered as Section 8 Company      | 39          |\n|
    7             | Business Planning                                          | 42          |\n|
    8             | Financial Management                                       | 55          |\n|
    9             | Funding Arrangement                                        | 60          |\n|
    10            | Monitoring by the PO, POPI and Funding Agencies            | 80          |\n|
    Attachment    |                                                            |             |\n|
    1             | Producer Company Act provisions                            |             |\n|
    2             | PRODUCE Fund Operational Guidelines                        | 106         |\n|
    3             | SFAC Circular on Promoting / supporting Producer Companies | 114         |\n|
    4             | Case Study on Bilaspur Model of PO                         | 125         |\n|
    5             | Indicative Framework of the process of forming a PO        | 131         |\n|
    6             | References                                                 | 138         |\n|
    7             | Memorandum of Agreement between NABARD and POPI            | 139         |\n|
    8             | Memorandum of Understanding between NABARD and RSA         | 143         |\n|
    9             |                                                            |             |\n|
    Abbreviations |                                                            |             |\n|               |                                                            |             |\n|
    146           |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |\n|               |                                                            |             |'''
  - '''Agro-industries generate residues like husk, hull, shell, peel, testa, skin,
    fibre, bran, linter, stone, seed, cob, prawn, head, frog legs, low grade fish,
    leather waste, hair, bones, coir dust, saw dust, bamboo dust, etc. which could
    be recycled or used efficiently through agro-processing centres. In the last three
    decades, rice and sugarcane residues have increased by 162 and 172 %, respectively.
    Their disposal problem needs serious rethinking (Vimal, 1981). To some extent
    these organic residues are used as soil conditioner, animal feed, fuel, thatching
    and packing materials. These can also be put to new uses for manufacture of various
    chemicals and specific products (like silica, alcohol, tannins, glue, gelatine,
    wax, etc), feed, pharmaceuticals (Iycogenin, antibiotics, vitamins, etc.), fertilizers,
    energy, construction materials, paper pulp, handicraft materials etc. Residues
    from fruit and vegetable industries, fish and marine industries and slaughter
    o straw decrease their efficiency without pretreatment.'''
- source_sentence: What is the purpose of using pectolytic enzymes in fruit juice
    processing?
  sentences:
  - '''Aggregating producers into collectives is one of the best mechanism to improve
    access of small producers to investment, technology and market.  The facilitating
    agency should however keep the following factors in view:   a. Types of small
    scale producers in the target area, volume of production, socioeconomic status,
    marketing arrangement  b. Sufficient demand in the existing market to absorb  the
    additional production without  significantly affecting the prices  c. Willingness
    of producers to invest and adopt new technology, if identified, to increase  productivity
    or quality of produce  d. Challenges in the market chain and market environment
    e. Vulnerability of the market to shocks, trends and seasonality  f. Previous
    experience of collective action (of any kind) in the community g. Key commodities,
    processed products or semi-finished goods demanded by major  retailers or processing
    companies in the surrounding areas/districts  h. Support from Government Departments,
    NGOs, specialist support agencies and  private companies  for enterprise development  i.
    Incentives for members (also disincentives) for joining the PO    Keeping in view
    the sustainability of a Producer Organisation, a flow chart of activities along
    with timeline, verifiable indicators and risk factors is provided at Attachment-5.'''
  - '''2. Sampling method to be adopted – Random Size of the card including area for
    label and   other details = 20 x 30 cmm = 600 cm 2 No. of Grids = 30 Area of each
    grid = 7 x 2 cm = 14 cm 2 Total No. of eggs / cm 2 to be accommodated = 96,000
    – 1,08,000 Mean number of egg / cm 2 of the card in the grid area excluding area
    for labeling = 200 – 250 Number of counts/ card of size 20 x 30 cm to be taken
    No. of parasitised eggs = 12 • 3-4 days old parasitised egg card has to be selected
    for examination • count the number of eggs and eggs  parasitised in an area  by
    1 cm 2 • Per card of size 20 x 30 cm count randomly in 12 positions • Repeat the
    process for three different cards of same age • Express the per cent parasitisation
    . The result should fall in range of 85-90 per cent.'''
  - '''Pectins are colloidal in nature, making solutions viscous and holding other
    materials in suspension. Pectinesterase removes methyl groups from the pectin
    molecules exposing carboxyl groups which in the presence of bi- or multivalent
    cations, such as calcium, form insoluble salts which can readily be removed. At
    the same time, polygalacturonase degrades macromolecular pectin, causing reduction
    in viscosity and destroying the protective colloidal action so that suspended
    materials will settle out. Extensive use of pectolytic enzymes is made in processing
    fruit juices. Addition of pectic enzymes to grapes or other fruits during crushing
    or grinding results in increased yields of juice on pressing. Wine from grapes
    so treated will usually clear faster when fermentation is complete, and have better
    color.'''
- source_sentence: What is the purpose of the PM-Kisan Portal?
  sentences:
  - '''   2) In case of cultivable land in the State of Nagaland which is categorised
    as Jhum  land as per definition under Section–2(7) of the Nagaland Jhum Land Act,
    1970 and which is owned by the community/clan/village council/village chieftan,
    the identification of beneficiaries under PM-Kisan scheme, shall be on the basis
    of certification of land holding by the village council/chief/head of the village,
    duly verified by the administrative head of the circle/sub division and countersigned
    by the Deputy Commissioner of the District. Provided that the name of the beneficiary
    is included in the state of Nagaland''s Agriculture Census of 2015-16. This proviso
    shall not be applicable in cases of succession and family partition.  The list
    of such beneficiaries shall be subject to the exclusions under the operational
    guidelines.  5.6  For identification of *bona fide* beneficiary under PM-Kisan
    Scheme in Jharkhand, the following proposal of Government of Jharkhand was considered
    and approved by the Committee: \''The farmer will be asked to submit ''Vanshavali
    (Lineage)'' linked to the entry of land record comprising his \\ her ancestor''s
    name giving a chart of successor. This lineage chart shall be submitted before
    the Gram Sabha for calling objections. After approval of the Gram Sabha, the village
    level \\ circle level revenue officials will verify and authenticate the Vanshawali
    and possession of holding. This authenticated list of farmers after due verification
    of succession chart shall be countersigned by the District level revenue authority.
    Farmers'' names, subject to the exclusion criterion after following the aforementioned
    process, shall be uploaded on the PM-Kisan portal along with other required details
    for this disbursement of benefit under the scheme.\'''''
  - '''Deep summer ploughing should be done for field preparation for pulses,apply
    FYM and compost @ 8-10 t/ha and mix well. Sowing of Pigeon pea should be done
    by the end of June in rows at the spacing of 60-90x15-20 cm. Seed rate should
    be 12-15 kg/ha Seed should be treated with Carbendazim or Thirum @3g/kg seed Fertilizer
    dose should be scheduled as per the soil test results. In general, 20-25 kg N,
    45-50 kg P and 15-20 kg K and 20 kg S should be given basal. Improved varieties
    like Chhattisgarh Arhar -1, Chhattisgarh-2, Rajivlochan and TJT-501 should be
    sown. Soybean and other pulse crops should be sown with proper drainage arrangement.
    For this seed should be treated with culture before sowing. The quantity of Rhizobium
    culture@5g + PSB @ 10 g/kg seed should be used for this seed treatment.'''
  - '''Union Territory. The details of farmers are being maintained by the States
    / UTs either in electronic form or in manual register. To make integrated platform
    available in the country to assist in benefit transfer, a platform named **PM-Kisan
    Portal** available at URL (**http://pmkisan.gov.in**) has been be launched for
    uploading the farmers'' details at a single web-portal in a uniform structure.
    9.2  The PM-Kisan Portal has been created with the following objectives -  i)  To
    provide verified and single source of truth on farmers'' details at the portal.  ii)  Timely
    assistance to the farmers in farm operation  iii)  A unified e-platform for transferring
    of cash benefits into farmer''s bank account  through Public Financial Management
    System (PFMS) integration.  iv)  Location wise availability of benefited farmers''
    list.  v)  Ease of monitoring across the country on fund transaction details.'''
- source_sentence: What should be done before sowing pigeonpea in fields where it
    is being sown for the first time after a long time?
  sentences:
  - '''The sole arbitrator shall be appointed by NABARD in case of dispute raised
    by NABARD, from the panel of three persons nominated by RSA. Similarly, the sole
    arbitrator shall be appointed by RSA if dispute is raised by RSA from the panel
    of three persons nominated by NABARD. The language of the Arbitration shall be
    English and the arbitrator shall be fluent in English. The arbitrator should be
    person of repute and integrity and place of arbitration shall be Mumbai.\''   9.
    NABARD shall have the right to enter into similar MoU/agreements with any other  RSA/Institution.  10.
    Any notice required to be given under this MoU/Agreement shall be served on the
    party at  their respective address given below by hand delivery or by registered
    post :'''
  - '''y Firstly, Treat 1kg seeds with a mixture of 2 grams of thiram and one gram
    of carbendazim or 4 grams of Trichoderma + 1 gram of carboxyne or carbendazim.
    Before planting, treat each seed with a unique Rhizobium culture of pigeon pea.
    A packet of this culture has to be sprinkled over 10 kg of seeds, then mix it
    lightly with hands, so that a light layer is formed on the seeds. Sow this seed
    immediately. There is a possibility of the death of culture organisms from strong
    sunlight. In fields where pigeonpea is being sown for the first time after a long
    time, it must use culture.'''
  - '''Organic farming is one of the several approaches found to meet the objectives
    of sustainable agriculture. Organic farming is often associated directly with,
    \''Sustainable farming.\'' However, ‘organic farming’ and ‘sustainable farming’,
    policy and ethics-wise are t wo different terms. Many techniques used in organic
    farming like inter-cropping, mulching and integration of crops and livestock are
    not alien to various agriculture systems including the traditional agriculture
    practiced in old countries like India. However, organic farming is based on various
    laws and certification programmes, which prohibit the use of almost all synthetic
    inputs, and health of the soil is recognized as the central theme of the method.
    Organic products are grown under a system of agriculture without the use of chemical
    fertilizers and pesticides with an environmentally and socially responsible approach.
    This is a method of farming that works at'''
model-index:
- name: SentenceTransformer based on BAAI/bge-small-en-v1.5
  results:
  - task:
      type: information-retrieval
      name: Information Retrieval
    dataset:
      name: val evaluator
      type: val_evaluator
    metrics:
    - type: cosine_accuracy@1
      value: 0.4680306905370844
      name: Cosine Accuracy@1
    - type: cosine_accuracy@5
      value: 0.9092071611253197
      name: Cosine Accuracy@5
    - type: cosine_accuracy@10
      value: 0.9603580562659847
      name: Cosine Accuracy@10
    - type: cosine_precision@1
      value: 0.4680306905370844
      name: Cosine Precision@1
    - type: cosine_precision@5
      value: 0.18184143222506394
      name: Cosine Precision@5
    - type: cosine_precision@10
      value: 0.09603580562659846
      name: Cosine Precision@10
    - type: cosine_recall@1
      value: 0.4680306905370844
      name: Cosine Recall@1
    - type: cosine_recall@5
      value: 0.9092071611253197
      name: Cosine Recall@5
    - type: cosine_recall@10
      value: 0.9603580562659847
      name: Cosine Recall@10
    - type: cosine_ndcg@5
      value: 0.7079399335444153
      name: Cosine Ndcg@5
    - type: cosine_ndcg@10
      value: 0.724527850349024
      name: Cosine Ndcg@10
    - type: cosine_ndcg@100
      value: 0.732682390595948
      name: Cosine Ndcg@100
    - type: cosine_mrr@5
      value: 0.6404518329070746
      name: Cosine Mrr@5
    - type: cosine_mrr@10
      value: 0.6473191450493229
      name: Cosine Mrr@10
    - type: cosine_mrr@100
      value: 0.649235332852707
      name: Cosine Mrr@100
    - type: cosine_map@100
      value: 0.6492353328527082
      name: Cosine Map@100
    - type: dot_accuracy@1
      value: 0.46675191815856776
      name: Dot Accuracy@1
    - type: dot_accuracy@5
      value: 0.9092071611253197
      name: Dot Accuracy@5
    - type: dot_accuracy@10
      value: 0.9603580562659847
      name: Dot Accuracy@10
    - type: dot_precision@1
      value: 0.46675191815856776
      name: Dot Precision@1
    - type: dot_precision@5
      value: 0.18184143222506394
      name: Dot Precision@5
    - type: dot_precision@10
      value: 0.09603580562659846
      name: Dot Precision@10
    - type: dot_recall@1
      value: 0.46675191815856776
      name: Dot Recall@1
    - type: dot_recall@5
      value: 0.9092071611253197
      name: Dot Recall@5
    - type: dot_recall@10
      value: 0.9603580562659847
      name: Dot Recall@10
    - type: dot_ndcg@5
      value: 0.7074679767075504
      name: Dot Ndcg@5
    - type: dot_ndcg@10
      value: 0.7240558935121589
      name: Dot Ndcg@10
    - type: dot_ndcg@100
      value: 0.7322104337590828
      name: Dot Ndcg@100
    - type: dot_mrr@5
      value: 0.6398124467178163
      name: Dot Mrr@5
    - type: dot_mrr@10
      value: 0.6466797588600646
      name: Dot Mrr@10
    - type: dot_mrr@100
      value: 0.6485959466634487
      name: Dot Mrr@100
    - type: dot_map@100
      value: 0.6485959466634499
      name: Dot Map@100
---

# SentenceTransformer based on BAAI/bge-small-en-v1.5

This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

## Model Details

### Model Description
- **Model Type:** Sentence Transformer
- **Base model:** [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) <!-- at revision 5c38ec7c405ec4b44b94cc5a9bb96e735b38267a -->
- **Maximum Sequence Length:** 512 tokens
- **Output Dimensionality:** 384 tokens
- **Similarity Function:** Cosine Similarity
<!-- - **Training Dataset:** Unknown -->
<!-- - **Language:** Unknown -->
<!-- - **License:** Unknown -->

### Model Sources

- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)

### Full Model Architecture

```
SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)
```

## Usage

### Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

```bash
pip install -U sentence-transformers
```

Then you can load this model and run inference.
```python
from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("SamagraDataGov/embedding_finetuned")
# Run inference
sentences = [
    'What should be done before sowing pigeonpea in fields where it is being sown for the first time after a long time?',
    "'y Firstly, Treat 1kg seeds with a mixture of 2 grams of thiram and one gram of carbendazim or 4 grams of Trichoderma + 1 gram of carboxyne or carbendazim. Before planting, treat each seed with a unique Rhizobium culture of pigeon pea. A packet of this culture has to be sprinkled over 10 kg of seeds, then mix it lightly with hands, so that a light layer is formed on the seeds. Sow this seed immediately. There is a possibility of the death of culture organisms from strong sunlight. In fields where pigeonpea is being sown for the first time after a long time, it must use culture.'",
    "'Organic farming is one of the several approaches found to meet the objectives of sustainable agriculture. Organic farming is often associated directly with, \\'Sustainable farming.\\' However, ‘organic farming’ and ‘sustainable farming’, policy and ethics-wise are t wo different terms. Many techniques used in organic farming like inter-cropping, mulching and integration of crops and livestock are not alien to various agriculture systems including the traditional agriculture practiced in old countries like India. However, organic farming is based on various laws and certification programmes, which prohibit the use of almost all synthetic inputs, and health of the soil is recognized as the central theme of the method. Organic products are grown under a system of agriculture without the use of chemical fertilizers and pesticides with an environmentally and socially responsible approach. This is a method of farming that works at'",
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
```

<!--
### Direct Usage (Transformers)

<details><summary>Click to see the direct usage in Transformers</summary>

</details>
-->

<!--
### Downstream Usage (Sentence Transformers)

You can finetune this model on your own dataset.

<details><summary>Click to expand</summary>

</details>
-->

<!--
### Out-of-Scope Use

*List how the model may foreseeably be misused and address what users ought not to do with the model.*
-->

## Evaluation

### Metrics

#### Information Retrieval
* Dataset: `val_evaluator`
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)

| Metric              | Value      |
|:--------------------|:-----------|
| cosine_accuracy@1   | 0.468      |
| cosine_accuracy@5   | 0.9092     |
| cosine_accuracy@10  | 0.9604     |
| cosine_precision@1  | 0.468      |
| cosine_precision@5  | 0.1818     |
| cosine_precision@10 | 0.096      |
| cosine_recall@1     | 0.468      |
| cosine_recall@5     | 0.9092     |
| cosine_recall@10    | 0.9604     |
| cosine_ndcg@5       | 0.7079     |
| cosine_ndcg@10      | 0.7245     |
| cosine_ndcg@100     | 0.7327     |
| cosine_mrr@5        | 0.6405     |
| cosine_mrr@10       | 0.6473     |
| cosine_mrr@100      | 0.6492     |
| cosine_map@100      | 0.6492     |
| dot_accuracy@1      | 0.4668     |
| dot_accuracy@5      | 0.9092     |
| dot_accuracy@10     | 0.9604     |
| dot_precision@1     | 0.4668     |
| dot_precision@5     | 0.1818     |
| dot_precision@10    | 0.096      |
| dot_recall@1        | 0.4668     |
| dot_recall@5        | 0.9092     |
| dot_recall@10       | 0.9604     |
| dot_ndcg@5          | 0.7075     |
| dot_ndcg@10         | 0.7241     |
| dot_ndcg@100        | 0.7322     |
| dot_mrr@5           | 0.6398     |
| dot_mrr@10          | 0.6467     |
| dot_mrr@100         | 0.6486     |
| **dot_map@100**     | **0.6486** |

<!--
## Bias, Risks and Limitations

*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
-->

<!--
### Recommendations

*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
-->

## Training Details

### Training Hyperparameters
#### Non-Default Hyperparameters

- `eval_strategy`: steps
- `gradient_accumulation_steps`: 4
- `learning_rate`: 1e-05
- `weight_decay`: 0.01
- `num_train_epochs`: 1.0
- `warmup_ratio`: 0.1
- `load_best_model_at_end`: True

#### All Hyperparameters
<details><summary>Click to expand</summary>

- `overwrite_output_dir`: False
- `do_predict`: False
- `eval_strategy`: steps
- `prediction_loss_only`: True
- `per_device_train_batch_size`: 8
- `per_device_eval_batch_size`: 8
- `per_gpu_train_batch_size`: None
- `per_gpu_eval_batch_size`: None
- `gradient_accumulation_steps`: 4
- `eval_accumulation_steps`: None
- `torch_empty_cache_steps`: None
- `learning_rate`: 1e-05
- `weight_decay`: 0.01
- `adam_beta1`: 0.9
- `adam_beta2`: 0.999
- `adam_epsilon`: 1e-08
- `max_grad_norm`: 1.0
- `num_train_epochs`: 1.0
- `max_steps`: -1
- `lr_scheduler_type`: linear
- `lr_scheduler_kwargs`: {}
- `warmup_ratio`: 0.1
- `warmup_steps`: 0
- `log_level`: passive
- `log_level_replica`: warning
- `log_on_each_node`: True
- `logging_nan_inf_filter`: True
- `save_safetensors`: True
- `save_on_each_node`: False
- `save_only_model`: False
- `restore_callback_states_from_checkpoint`: False
- `no_cuda`: False
- `use_cpu`: False
- `use_mps_device`: False
- `seed`: 42
- `data_seed`: None
- `jit_mode_eval`: False
- `use_ipex`: False
- `bf16`: False
- `fp16`: False
- `fp16_opt_level`: O1
- `half_precision_backend`: auto
- `bf16_full_eval`: False
- `fp16_full_eval`: False
- `tf32`: None
- `local_rank`: 0
- `ddp_backend`: None
- `tpu_num_cores`: None
- `tpu_metrics_debug`: False
- `debug`: []
- `dataloader_drop_last`: False
- `dataloader_num_workers`: 0
- `dataloader_prefetch_factor`: None
- `past_index`: -1
- `disable_tqdm`: False
- `remove_unused_columns`: True
- `label_names`: None
- `load_best_model_at_end`: True
- `ignore_data_skip`: False
- `fsdp`: []
- `fsdp_min_num_params`: 0
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
- `fsdp_transformer_layer_cls_to_wrap`: None
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
- `deepspeed`: None
- `label_smoothing_factor`: 0.0
- `optim`: adamw_torch
- `optim_args`: None
- `adafactor`: False
- `group_by_length`: False
- `length_column_name`: length
- `ddp_find_unused_parameters`: None
- `ddp_bucket_cap_mb`: None
- `ddp_broadcast_buffers`: False
- `dataloader_pin_memory`: True
- `dataloader_persistent_workers`: False
- `skip_memory_metrics`: True
- `use_legacy_prediction_loop`: False
- `push_to_hub`: False
- `resume_from_checkpoint`: None
- `hub_model_id`: None
- `hub_strategy`: every_save
- `hub_private_repo`: False
- `hub_always_push`: False
- `gradient_checkpointing`: False
- `gradient_checkpointing_kwargs`: None
- `include_inputs_for_metrics`: False
- `eval_do_concat_batches`: True
- `fp16_backend`: auto
- `push_to_hub_model_id`: None
- `push_to_hub_organization`: None
- `mp_parameters`: 
- `auto_find_batch_size`: False
- `full_determinism`: False
- `torchdynamo`: None
- `ray_scope`: last
- `ddp_timeout`: 1800
- `torch_compile`: False
- `torch_compile_backend`: None
- `torch_compile_mode`: None
- `dispatch_batches`: None
- `split_batches`: None
- `include_tokens_per_second`: False
- `include_num_input_tokens_seen`: False
- `neftune_noise_alpha`: None
- `optim_target_modules`: None
- `batch_eval_metrics`: False
- `eval_on_start`: False
- `eval_use_gather_object`: False
- `batch_sampler`: batch_sampler
- `multi_dataset_batch_sampler`: proportional

</details>

### Training Logs
| Epoch      | Step    | Training Loss | loss      | val_evaluator_dot_map@100 |
|:----------:|:-------:|:-------------:|:---------:|:-------------------------:|
| 0.0682     | 15      | 0.6463        | 0.3498    | 0.6152                    |
| 0.1364     | 30      | 0.3071        | 0.1975    | 0.6212                    |
| 0.2045     | 45      | 0.2023        | 0.1576    | 0.6248                    |
| 0.2727     | 60      | 0.1457        | 0.1357    | 0.6321                    |
| 0.3409     | 75      | 0.2456        | 0.1228    | 0.6370                    |
| 0.4091     | 90      | 0.1407        | 0.1130    | 0.6365                    |
| 0.4773     | 105     | 0.1727        | 0.1042    | 0.6393                    |
| 0.5455     | 120     | 0.1311        | 0.0975    | 0.6428                    |
| 0.6136     | 135     | 0.13          | 0.0910    | 0.6433                    |
| 0.6818     | 150     | 0.0919        | 0.0872    | 0.6466                    |
| 0.75       | 165     | 0.1587        | 0.0851    | 0.6490                    |
| 0.8182     | 180     | 0.1098        | 0.0834    | 0.6481                    |
| 0.8864     | 195     | 0.1013        | 0.0824    | 0.6461                    |
| **0.9545** | **210** | **0.1144**    | **0.082** | **0.6486**                |
| 1.0        | 220     | -             | 0.0820    | 0.6486                    |

* The bold row denotes the saved checkpoint.

### Framework Versions
- Python: 3.10.14
- Sentence Transformers: 3.0.1
- Transformers: 4.43.4
- PyTorch: 2.4.1+cu121
- Accelerate: 0.33.0
- Datasets: 2.21.0
- Tokenizers: 0.19.1

## Citation

### BibTeX

#### Sentence Transformers
```bibtex
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}
```

#### GISTEmbedLoss
```bibtex
@misc{solatorio2024gistembed,
    title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning}, 
    author={Aivin V. Solatorio},
    year={2024},
    eprint={2402.16829},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}
```

<!--
## Glossary

*Clearly define terms in order to be accessible across audiences.*
-->

<!--
## Model Card Authors

*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
-->

<!--
## Model Card Contact

*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
-->