Add SGPT-2.7B-weightedmean-nli-bitfit

Browse files

Files changed (14) hide show

1_Pooling/config.json +9 -0
README.md +89 -0
config.json +82 -0
config_sentence_transformers.json +7 -0
eval/similarity_evaluation_sts-dev_results.csv +12 -0
merges.txt +0 -0
modules.json +14 -0
pytorch_model.bin +3 -0
sentence_bert_config.json +4 -0
similarity_evaluation_sts-test_results.csv +2 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
vocab.json +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "word_embedding_dimension": 2560,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": true,
+  "pooling_mode_lasttoken": false
+}

README.md ADDED Viewed

	@@ -0,0 +1,89 @@

+---
+pipeline_tag: sentence-similarity
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+---
+# {MODEL_NAME}
+This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 2560 dimensional dense vector space and can be used for tasks like clustering or semantic search.
+<!--- Describe your model here -->
+## Usage (Sentence-Transformers)
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+Then you can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+sentences = ["This is an example sentence", "Each sentence is converted"]
+model = SentenceTransformer('{MODEL_NAME}')
+embeddings = model.encode(sentences)
+print(embeddings)
+```
+## Evaluation Results
+<!--- Describe how your model was evaluated -->
+For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
+## Training
+The model was trained with the parameters:
+**DataLoader**:
+`sentence_transformers.datasets.NoDuplicatesDataLoader.NoDuplicatesDataLoader` of length 70456 with parameters:
+```
+{'batch_size': 8}
+```
+**Loss**:
+`sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss` with parameters:
+  ```
+  {'scale': 20.0, 'similarity_fct': 'cos_sim'}
+  ```
+Parameters of the fit()-Method:
+```
+{
+    "epochs": 1,
+    "evaluation_steps": 7045,
+    "evaluator": "sentence_transformers.evaluation.EmbeddingSimilarityEvaluator.EmbeddingSimilarityEvaluator",
+    "max_grad_norm": 1,
+    "optimizer_class": "<class 'transformers.optimization.AdamW'>",
+    "optimizer_params": {
+        "lr": 0.0002
+    },
+    "scheduler": "WarmupLinear",
+    "steps_per_epoch": null,
+    "warmup_steps": 7046,
+    "weight_decay": 0.01
+}
+```
+## Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 75, 'do_lower_case': False}) with Transformer model: GPTNeoModel
+  (1): Pooling({'word_embedding_dimension': 2560, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': True, 'pooling_mode_lasttoken': False})
+)
+```
+## Citing & Authors
+<!--- Describe where people can find more information -->

config.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_name_or_path": "EleutherAI/gpt-neo-2.7B",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPTNeoModel"
+  ],
+  "attention_dropout": 0,
+  "attention_layers": [
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local"
+  ],
+  "attention_types": [
+    [
+      [
+        "global",
+        "local"
+      ],
+      16
+    ]
+  ],
+  "bos_token_id": 50256,
+  "embed_dropout": 0,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": null,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "gpt_neo",
+  "num_heads": 20,
+  "num_layers": 32,
+  "resid_dropout": 0,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50,
+      "temperature": 0.9
+    }
+  },
+  "tokenizer_class": "GPT2Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.11.3",
+  "use_cache": true,
+  "vocab_size": 50257,
+  "window_size": 256
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "__version__": {
+    "sentence_transformers": "2.1.0",
+    "transformers": "4.11.3",
+    "pytorch": "1.10.1"
+  }
+}

eval/similarity_evaluation_sts-dev_results.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
+0,7045,0.854762523109346,0.8625722411777832,0.8559952110984044,0.8599071798553728,0.8562325179051519,0.8607011270217957,0.7604255385773144,0.7606474726533847
+0,14090,0.862071545726209,0.8686263495658821,0.8587162825998957,0.8617412291621442,0.8592342866823668,0.8627125269846087,0.782391853949178,0.7829935432149258
+0,21135,0.861147560133928,0.8673147899430655,0.8565176946586014,0.859934895176966,0.8562610749976107,0.8599692261614817,0.767074455302903,0.7695279716565803
+0,28180,0.8633257964426606,0.8706412335168616,0.8552663992944554,0.8599920464619075,0.8540030273419198,0.8592715940080606,0.7791548165962676,0.7857437212669921
+0,35225,0.8571401068053901,0.8634311296708672,0.8509601042154958,0.8549359573084144,0.8507577553572114,0.8553929614415839,0.7642286538174881,0.770790070391215
+0,42270,0.8598605654357643,0.8650105143335562,0.8511585774583015,0.8554589632664334,0.8505706583139645,0.8553814756685433,0.7617624805084298,0.7702625967180601
+0,49315,0.8573338952704139,0.8609427481847676,0.842483574189439,0.8467522455218017,0.8416057486283591,0.8466172561049204,0.7626709741843098,0.772898390349653
+0,56360,0.8621834500026332,0.8670699694896324,0.8505378356088411,0.8545156885949555,0.8496468004499398,0.854239264916894,0.7677057311830797,0.7762473321595961
+0,63405,0.8591097675730657,0.8630995653851116,0.8446734640853417,0.8496054770256773,0.8437993624528112,0.8492440088050155,0.7627356339474485,0.7727980219530748
+0,70450,0.8598307992062442,0.864221237988858,0.84629527410727,0.8513689337571044,0.845389605407747,0.8508766807487939,0.7613761802820678,0.7708455277248111
+0,-1,0.8598065664641443,0.8642014942030082,0.8462916558850146,0.8513545689785207,0.8453849685801835,0.8508523604473343,0.7613656467412337,0.7707829013471281

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97d3fa68460291ddf0b431b9cf0beb67ea22dac2a356a732447e7f74388ce61b
+size 10739623849

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 75,
+  "do_lower_case": false
+}

similarity_evaluation_sts-test_results.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2	+ -1,-1,0.8421195813886659,0.8544705934133663,0.8331975612681435,0.8365254010816424,0.8308796371031103,0.8350842363910842,0.7416086337173262,0.730738158975577

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "unk_token": "<\|endoftext\|>", "pad_token": "<\|endoftext\|>"}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff