mrm8488's picture
Update README.md
b0fcc7a verified
|
raw
history blame
46.1 kB
metadata
language: []
library_name: sentence-transformers
tags:
  - sentence-transformers
  - sentence-similarity
  - feature-extraction
  - dataset_size:1K<n<10K
  - loss:MatryoshkaLoss
  - loss:CoSENTLoss
base_model: intfloat/multilingual-e5-large
metrics:
  - pearson_cosine
  - spearman_cosine
  - pearson_manhattan
  - spearman_manhattan
  - pearson_euclidean
  - spearman_euclidean
  - pearson_dot
  - spearman_dot
  - pearson_max
  - spearman_max
widget:
  - source_sentence: El hombre captura una pelota
    sentences:
      - Un hombre lanza una pelota en el aire.
      - Un hombre está acompañando a una mujer en el camino.
      - Dos mujeres están cantando una hermosa canción.
  - source_sentence: La mujer está cortando papas.
    sentences:
      - Una mujer está cortando patatas.
      - Los patos blancos se encuentran parados en el suelo.
      - Hay una banda tocando en el escenario principal.
  - source_sentence: Un hombre está buscando algo.
    sentences:
      - En un mercado de granjeros, se encuentra un hombre.
      - Romney filmó en una reunión privada de financiadores
      - Dos perros de color negro están jugando en la hierba.
  - source_sentence: Un hombre saltando la cuerda.
    sentences:
      - Un hombre está saltando la cuerda.
      - La capital de Siria fue golpeada por dos explosiones
      - Los gatitos están comiendo de los platos.
  - source_sentence: El avión está tocando tierra.
    sentences:
      - El avión animado se encuentra en proceso de aterrizaje.
      - Un pequeño niño montado en un columpio en el parque.
      - Una persona de sexo femenino está cortando una cebolla.
pipeline_tag: sentence-similarity
model-index:
  - name: SentenceTransformer based on intfloat/multilingual-e5-large
    results:
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 768
          type: sts-dev-768
        metrics:
          - type: pearson_cosine
            value: 0.8382359637067547
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8429605562993187
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8336600898033378
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8448900621318144
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8328580183902631
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8441561677427524
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8287262441829462
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8322746204974042
            name: Spearman Dot
          - type: pearson_max
            value: 0.8382359637067547
            name: Pearson Max
          - type: spearman_max
            value: 0.8448900621318144
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 512
          type: sts-dev-512
        metrics:
          - type: pearson_cosine
            value: 0.8334610747047482
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8405630189692351
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8316848819512679
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8426142019940397
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8305903222472721
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8415256700272777
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8172993617433827
            name: Pearson Dot
          - type: spearman_dot
            value: 0.823043401157181
            name: Spearman Dot
          - type: pearson_max
            value: 0.8334610747047482
            name: Pearson Max
          - type: spearman_max
            value: 0.8426142019940397
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 256
          type: sts-dev-256
        metrics:
          - type: pearson_cosine
            value: 0.8240056098321313
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8355774999921849
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8261458415991961
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8355100986320139
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.825647934422587
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8362336344962497
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7924886689283153
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7992788592975302
            name: Spearman Dot
          - type: pearson_max
            value: 0.8261458415991961
            name: Pearson Max
          - type: spearman_max
            value: 0.8362336344962497
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 128
          type: sts-dev-128
        metrics:
          - type: pearson_cosine
            value: 0.8098656853945027
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8304511476467773
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8208946291392102
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8308359029901535
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8195023110971954
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8302481276550623
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7412744037070784
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7489986968697009
            name: Spearman Dot
          - type: pearson_max
            value: 0.8208946291392102
            name: Pearson Max
          - type: spearman_max
            value: 0.8308359029901535
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 64
          type: sts-dev-64
        metrics:
          - type: pearson_cosine
            value: 0.7777717898212414
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8152005256760807
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8007095698339157
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8116493253806699
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8000905317852872
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8110794468804238
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.6540905690432955
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6589924104221199
            name: Spearman Dot
          - type: pearson_max
            value: 0.8007095698339157
            name: Pearson Max
          - type: spearman_max
            value: 0.8152005256760807
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 32
          type: sts-dev-32
        metrics:
          - type: pearson_cosine
            value: 0.7276908730898617
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.7805691037554072
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7659952363354546
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7751944660837697
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7674462214503804
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7773298298599879
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5395044219284906
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5341543426421572
            name: Spearman Dot
          - type: pearson_max
            value: 0.7674462214503804
            name: Pearson Max
          - type: spearman_max
            value: 0.7805691037554072
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts dev 16
          type: sts-dev-16
        metrics:
          - type: pearson_cosine
            value: 0.6737235484120327
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.7425360948217027
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7187007732867645
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7279621825071231
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7234911258158329
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.7374355146279606
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.44701957007430754
            name: Pearson Dot
          - type: spearman_dot
            value: 0.44243975098384164
            name: Spearman Dot
          - type: pearson_max
            value: 0.7234911258158329
            name: Pearson Max
          - type: spearman_max
            value: 0.7425360948217027
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 768
          type: sts-test-768
        metrics:
          - type: pearson_cosine
            value: 0.8637130740455785
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8774757245850818
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8739327947840198
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8771247494149252
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8742964420051067
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8774039769000851
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8587248460103846
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8692624735733635
            name: Spearman Dot
          - type: pearson_max
            value: 0.8742964420051067
            name: Pearson Max
          - type: spearman_max
            value: 0.8774757245850818
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 512
          type: sts-test-512
        metrics:
          - type: pearson_cosine
            value: 0.8608902316971913
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8761454408181157
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8723366100239835
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8755119028724399
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8727143818945785
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8758699632438892
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8498181878456328
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8568165420931783
            name: Spearman Dot
          - type: pearson_max
            value: 0.8727143818945785
            name: Pearson Max
          - type: spearman_max
            value: 0.8761454408181157
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 256
          type: sts-test-256
        metrics:
          - type: pearson_cosine
            value: 0.8546354043013908
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.871536658256446
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8697716394077537
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8737030599161743
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.86989853825415
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8736845554686979
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.8131428680674924
            name: Pearson Dot
          - type: spearman_dot
            value: 0.8076436370339797
            name: Spearman Dot
          - type: pearson_max
            value: 0.86989853825415
            name: Pearson Max
          - type: spearman_max
            value: 0.8737030599161743
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 128
          type: sts-test-128
        metrics:
          - type: pearson_cosine
            value: 0.8387977115140051
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8645489592292456
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8611375341227384
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8667215229295422
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.862154474303328
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8680162798983022
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.7492475609746636
            name: Pearson Dot
          - type: spearman_dot
            value: 0.7363955675375832
            name: Spearman Dot
          - type: pearson_max
            value: 0.862154474303328
            name: Pearson Max
          - type: spearman_max
            value: 0.8680162798983022
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 64
          type: sts-test-64
        metrics:
          - type: pearson_cosine
            value: 0.8168102869303625
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8585329796388539
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8518107264951738
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8606717941407515
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8533959511853835
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8623753165991692
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.6646337116783656
            name: Pearson Dot
          - type: spearman_dot
            value: 0.6473141838302237
            name: Spearman Dot
          - type: pearson_max
            value: 0.8533959511853835
            name: Pearson Max
          - type: spearman_max
            value: 0.8623753165991692
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 32
          type: sts-test-32
        metrics:
          - type: pearson_cosine
            value: 0.7813945227753345
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8424823964509079
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.8315336527432531
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.8431756901550471
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.8345328653107531
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8466076672836096
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.5520860449837447
            name: Pearson Dot
          - type: spearman_dot
            value: 0.5319238671245338
            name: Spearman Dot
          - type: pearson_max
            value: 0.8345328653107531
            name: Pearson Max
          - type: spearman_max
            value: 0.8466076672836096
            name: Spearman Max
      - task:
          type: semantic-similarity
          name: Semantic Similarity
        dataset:
          name: sts test 16
          type: sts-test-16
        metrics:
          - type: pearson_cosine
            value: 0.7198004009567176
            name: Pearson Cosine
          - type: spearman_cosine
            value: 0.8072120165730962
            name: Spearman Cosine
          - type: pearson_manhattan
            value: 0.7805727606105963
            name: Pearson Manhattan
          - type: spearman_manhattan
            value: 0.7997833060148871
            name: Spearman Manhattan
          - type: pearson_euclidean
            value: 0.7879106231813758
            name: Pearson Euclidean
          - type: spearman_euclidean
            value: 0.8090073332632988
            name: Spearman Euclidean
          - type: pearson_dot
            value: 0.44957276876149327
            name: Pearson Dot
          - type: spearman_dot
            value: 0.4411623904572447
            name: Spearman Dot
          - type: pearson_max
            value: 0.7879106231813758
            name: Pearson Max
          - type: spearman_max
            value: 0.8090073332632988
            name: Spearman Max

SentenceTransformer based on intfloat/multilingual-e5-large

This is a sentence-transformers model finetuned from intfloat/multilingual-e5-large on an augmented version of stsb_multi_es dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

  • Model Type: Sentence Transformer
  • Base model: intfloat/multilingual-e5-large
  • Maximum Sequence Length: 512 tokens
  • Output Dimensionality: 1024 tokens
  • Similarity Function: Cosine Similarity
  • Training Dataset:
    • stsb_multi_es_aug

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: XLMRobertaModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("mrm8488/multilingual-e5-large-ft-sts-spanish-matryoshka-768-16-5e")
# Run inference
sentences = [
    'El avión está tocando tierra.',
    'El avión animado se encuentra en proceso de aterrizaje.',
    'Un pequeño niño montado en un columpio en el parque.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 1024]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Semantic Similarity

Metric Value
pearson_cosine 0.8382
spearman_cosine 0.843
pearson_manhattan 0.8337
spearman_manhattan 0.8449
pearson_euclidean 0.8329
spearman_euclidean 0.8442
pearson_dot 0.8287
spearman_dot 0.8323
pearson_max 0.8382
spearman_max 0.8449

Semantic Similarity

Metric Value
pearson_cosine 0.8335
spearman_cosine 0.8406
pearson_manhattan 0.8317
spearman_manhattan 0.8426
pearson_euclidean 0.8306
spearman_euclidean 0.8415
pearson_dot 0.8173
spearman_dot 0.823
pearson_max 0.8335
spearman_max 0.8426

Semantic Similarity

Metric Value
pearson_cosine 0.824
spearman_cosine 0.8356
pearson_manhattan 0.8261
spearman_manhattan 0.8355
pearson_euclidean 0.8256
spearman_euclidean 0.8362
pearson_dot 0.7925
spearman_dot 0.7993
pearson_max 0.8261
spearman_max 0.8362

Semantic Similarity

Metric Value
pearson_cosine 0.8099
spearman_cosine 0.8305
pearson_manhattan 0.8209
spearman_manhattan 0.8308
pearson_euclidean 0.8195
spearman_euclidean 0.8302
pearson_dot 0.7413
spearman_dot 0.749
pearson_max 0.8209
spearman_max 0.8308

Semantic Similarity

Metric Value
pearson_cosine 0.7778
spearman_cosine 0.8152
pearson_manhattan 0.8007
spearman_manhattan 0.8116
pearson_euclidean 0.8001
spearman_euclidean 0.8111
pearson_dot 0.6541
spearman_dot 0.659
pearson_max 0.8007
spearman_max 0.8152

Semantic Similarity

Metric Value
pearson_cosine 0.7277
spearman_cosine 0.7806
pearson_manhattan 0.766
spearman_manhattan 0.7752
pearson_euclidean 0.7674
spearman_euclidean 0.7773
pearson_dot 0.5395
spearman_dot 0.5342
pearson_max 0.7674
spearman_max 0.7806

Semantic Similarity

Metric Value
pearson_cosine 0.6737
spearman_cosine 0.7425
pearson_manhattan 0.7187
spearman_manhattan 0.728
pearson_euclidean 0.7235
spearman_euclidean 0.7374
pearson_dot 0.447
spearman_dot 0.4424
pearson_max 0.7235
spearman_max 0.7425

Semantic Similarity

Metric Value
pearson_cosine 0.8637
spearman_cosine 0.8775
pearson_manhattan 0.8739
spearman_manhattan 0.8771
pearson_euclidean 0.8743
spearman_euclidean 0.8774
pearson_dot 0.8587
spearman_dot 0.8693
pearson_max 0.8743
spearman_max 0.8775

Semantic Similarity

Metric Value
pearson_cosine 0.8609
spearman_cosine 0.8761
pearson_manhattan 0.8723
spearman_manhattan 0.8755
pearson_euclidean 0.8727
spearman_euclidean 0.8759
pearson_dot 0.8498
spearman_dot 0.8568
pearson_max 0.8727
spearman_max 0.8761

Semantic Similarity

Metric Value
pearson_cosine 0.8546
spearman_cosine 0.8715
pearson_manhattan 0.8698
spearman_manhattan 0.8737
pearson_euclidean 0.8699
spearman_euclidean 0.8737
pearson_dot 0.8131
spearman_dot 0.8076
pearson_max 0.8699
spearman_max 0.8737

Semantic Similarity

Metric Value
pearson_cosine 0.8388
spearman_cosine 0.8645
pearson_manhattan 0.8611
spearman_manhattan 0.8667
pearson_euclidean 0.8622
spearman_euclidean 0.868
pearson_dot 0.7492
spearman_dot 0.7364
pearson_max 0.8622
spearman_max 0.868

Semantic Similarity

Metric Value
pearson_cosine 0.8168
spearman_cosine 0.8585
pearson_manhattan 0.8518
spearman_manhattan 0.8607
pearson_euclidean 0.8534
spearman_euclidean 0.8624
pearson_dot 0.6646
spearman_dot 0.6473
pearson_max 0.8534
spearman_max 0.8624

Semantic Similarity

Metric Value
pearson_cosine 0.7814
spearman_cosine 0.8425
pearson_manhattan 0.8315
spearman_manhattan 0.8432
pearson_euclidean 0.8345
spearman_euclidean 0.8466
pearson_dot 0.5521
spearman_dot 0.5319
pearson_max 0.8345
spearman_max 0.8466

Semantic Similarity

Metric Value
pearson_cosine 0.7198
spearman_cosine 0.8072
pearson_manhattan 0.7806
spearman_manhattan 0.7998
pearson_euclidean 0.7879
spearman_euclidean 0.809
pearson_dot 0.4496
spearman_dot 0.4412
pearson_max 0.7879
spearman_max 0.809

Training Details

Training Dataset

stsb_multi_es_aug

  • Dataset: stsb_multi_es_aug
  • Size: 2,697 training samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 8 tokens
    • mean: 22.25 tokens
    • max: 68 tokens
    • min: 8 tokens
    • mean: 22.01 tokens
    • max: 79 tokens
    • min: 0.0
    • mean: 2.67
    • max: 5.0
  • Samples:
    sentence1 sentence2 score
    El pájaro de tamaño reducido se posó con delicadeza en una rama cubierta de escarcha. Un ave de color amarillo descansaba tranquilamente en una rama. 3.200000047683716
    Una chica está tocando la flauta en un parque. Un grupo de músicos está tocando en un escenario al aire libre. 1.286
    La aclamada escritora británica, Doris Lessing, galardonada con el premio Nobel, fallece La destacada autora británica, Doris Lessing, reconocida con el prestigioso Premio Nobel, muere 4.199999809265137
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "CoSENTLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64,
            32,
            16
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Evaluation Dataset

stsb_multi_es_aug

  • Dataset: stsb_multi_es_aug
  • Size: 697 evaluation samples
  • Columns: sentence1, sentence2, and score
  • Approximate statistics based on the first 1000 samples:
    sentence1 sentence2 score
    type string string float
    details
    • min: 8 tokens
    • mean: 22.76 tokens
    • max: 67 tokens
    • min: 7 tokens
    • mean: 22.26 tokens
    • max: 63 tokens
    • min: 0.0
    • mean: 2.3
    • max: 5.0
  • Samples:
    sentence1 sentence2 score
    Un incendio ocurrido en un hospital psiquiátrico ruso resultó en la trágica muerte de 38 personas. Se teme que el incendio en un hospital psiquiátrico ruso cause la pérdida de la vida de 38 individuos. 4.199999809265137
    "Street dijo que el otro individuo a veces se siente avergonzado de su fiesta, lo cual provoca risas en la multitud" "A veces, el otro tipo se encuentra avergonzado de su fiesta y no se le puede culpar." 3.5
    El veterano diplomático de Malasia tuvo un encuentro con Suu Kyi el miércoles en la casa del lago en Yangon donde permanece bajo arresto domiciliario. Razali Ismail tuvo una reunión de 90 minutos con Suu Kyi, quien ganó el Premio Nobel de la Paz en 1991, en su casa del lago donde está recluida. 3.691999912261963
  • Loss: MatryoshkaLoss with these parameters:
    {
        "loss": "CoSENTLoss",
        "matryoshka_dims": [
            768,
            512,
            256,
            128,
            64,
            32,
            16
        ],
        "matryoshka_weights": [
            1,
            1,
            1,
            1,
            1,
            1,
            1
        ],
        "n_dims_per_step": -1
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • num_train_epochs: 5
  • warmup_ratio: 0.1
  • fp16: True

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 16
  • per_device_eval_batch_size: 16
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 5
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: False
  • fp16: True
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • batch_sampler: batch_sampler
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss loss sts-dev-128_spearman_cosine sts-dev-16_spearman_cosine sts-dev-256_spearman_cosine sts-dev-32_spearman_cosine sts-dev-512_spearman_cosine sts-dev-64_spearman_cosine sts-dev-768_spearman_cosine sts-test-128_spearman_cosine sts-test-16_spearman_cosine sts-test-256_spearman_cosine sts-test-32_spearman_cosine sts-test-512_spearman_cosine sts-test-64_spearman_cosine sts-test-768_spearman_cosine
0.5917 100 30.7503 30.6172 0.8117 0.7110 0.8179 0.7457 0.8244 0.7884 0.8252 - - - - - - -
1.1834 200 30.4696 32.6422 0.7952 0.7198 0.8076 0.7491 0.8125 0.7813 0.8142 - - - - - - -
1.7751 300 29.9233 31.5469 0.8152 0.7435 0.8250 0.7737 0.8302 0.8006 0.8305 - - - - - - -
2.3669 400 29.0716 31.8088 0.8183 0.7405 0.8248 0.7758 0.8299 0.8057 0.8324 - - - - - - -
2.9586 500 28.7971 32.6032 0.8176 0.7430 0.8241 0.7777 0.8289 0.8025 0.8316 - - - - - - -
3.5503 600 27.4766 34.7911 0.8241 0.7400 0.8314 0.7730 0.8369 0.8061 0.8394 - - - - - - -
4.1420 700 27.0639 35.7418 0.8294 0.7466 0.8354 0.7784 0.8389 0.8107 0.8409 - - - - - - -
4.7337 800 26.5119 36.2014 0.8305 0.7425 0.8356 0.7806 0.8406 0.8152 0.8430 - - - - - - -
5.0 845 - - - - - - - - - 0.8645 0.8072 0.8715 0.8425 0.8761 0.8585 0.8775

Framework Versions

  • Python: 3.10.12
  • Sentence Transformers: 3.0.0
  • Transformers: 4.41.1
  • PyTorch: 2.3.0+cu121
  • Accelerate: 0.30.1
  • Datasets: 2.19.1
  • Tokenizers: 0.19.1

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MatryoshkaLoss

@misc{kusupati2024matryoshka,
    title={Matryoshka Representation Learning}, 
    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
    year={2024},
    eprint={2205.13147},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}

CoSENTLoss

@online{kexuefm-8847,
    title={CoSENT: A more efficient sentence vector scheme than Sentence-BERT},
    author={Su Jianlin},
    year={2022},
    month={Jan},
    url={https://kexue.fm/archives/8847},
}