Add SetFit model

Browse files

Files changed (7) hide show

README.md +50 -3
config.json +1 -1
config_setfit.json +2 -2
model.safetensors +1 -1
model_head.pkl +2 -2
tokenizer.json +14 -2
tokenizer_config.json +7 -0

README.md CHANGED Viewed

@@ -9,8 +9,6 @@ base_model: BAAI/bge-small-en-v1.5
 metrics:
 - accuracy
 widget:
-- text: Can you let me know if my claim has been approved?
-- text: Can you provide an update on the progress of my claim?
 - text: Thank you for your outreach. Currently, our priorities are focused elsewhere,
     and we are not considering new solutions. I would be open to revisiting this conversation
     in [insert timeframe, e.g., 6 months]. Please follow up then.
@@ -19,8 +17,24 @@ widget:
     a reassessment.
 - text: I recently moved to a new apartment. How can I update my address for my renter's
     insurance policy?
 pipeline_tag: text-classification
 inference: false
 ---
 # SetFit with BAAI/bge-small-en-v1.5
@@ -50,6 +64,13 @@ The model has been trained using an efficient few-shot learning technique that i
 - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
 - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
 ## Uses
 ### Direct Use for Inference
@@ -68,7 +89,7 @@ from setfit import SetFitModel
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("setfit_model_id")
 # Run inference
-preds = model("Can you let me know if my claim has been approved?")
 ```
 <!--
@@ -102,6 +123,32 @@ preds = model("Can you let me know if my claim has been approved?")
 |:-------------|:----|:--------|:----|
 | Word count   | 1   | 14.3077 | 37  |
 ### Framework Versions
 - Python: 3.8.4
 - SetFit: 1.0.3

 metrics:
 - accuracy
 widget:
 - text: Thank you for your outreach. Currently, our priorities are focused elsewhere,
     and we are not considering new solutions. I would be open to revisiting this conversation
     in [insert timeframe, e.g., 6 months]. Please follow up then.
     a reassessment.
 - text: I recently moved to a new apartment. How can I update my address for my renter's
     insurance policy?
+- text: Can you provide an update on the status of my insurance claim?
+- text: I have a new mailing address. Please update it for my records.
 pipeline_tag: text-classification
 inference: false
+model-index:
+- name: SetFit with BAAI/bge-small-en-v1.5
+  results:
+  - task:
+      type: text-classification
+      name: Text Classification
+    dataset:
+      name: Unknown
+      type: unknown
+      split: test
+    metrics:
+    - type: accuracy
+      value: 0.8461538461538461
+      name: Accuracy
 ---
 # SetFit with BAAI/bge-small-en-v1.5
 - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055)
 - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit)
+## Evaluation
+### Metrics
+| Label   | Accuracy |
+|:--------|:---------|
+| **all** | 0.8462   |
 ## Uses
 ### Direct Use for Inference
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("setfit_model_id")
 # Run inference
+preds = model("Can you provide an update on the status of my insurance claim?")
 ```
 <!--
 |:-------------|:----|:--------|:----|
 | Word count   | 1   | 14.3077 | 37  |
+### Training Hyperparameters
+- batch_size: (32, 32)
+- num_epochs: (1, 1)
+- max_steps: -1
+- sampling_strategy: oversampling
+- num_iterations: 0
+- body_learning_rate: (2e-05, 1e-05)
+- head_learning_rate: 0.01
+- loss: CosineSimilarityLoss
+- distance_metric: cosine_distance
+- margin: 0.25
+- end_to_end: False
+- use_amp: False
+- warmup_proportion: 0.1
+- seed: 42
+- eval_max_steps: -1
+- load_best_model_at_end: True
+### Training Results
+| Epoch   | Step   | Training Loss | Validation Loss |
+|:-------:|:------:|:-------------:|:---------------:|
+| 0.0152  | 1      | 0.2404        | -               |
+| 0.7576  | 50     | 0.0375        | -               |
+| **1.0** | **66** | **-**         | **0.0347**      |
+* The bold row denotes the saved checkpoint.
 ### Framework Versions
 - Python: 3.8.4
 - SetFit: 1.0.3

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/Users/bot/.cache/torch/sentence_transformers/BAAI_bge-small-en-v1.5/",
   "architectures": [
     "BertModel"
   ],

 {
+  "_name_or_path": "checkpoints/step_66/",
   "architectures": [
     "BertModel"
   ],

config_setfit.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "labels": [
     "update_info",
     "claim_status",
     "coverage_info",
     "get_quote",
     "policy_renew"
-  ],
-  "normalize_embeddings": false
 }

 {
+  "normalize_embeddings": false,
   "labels": [
     "update_info",
     "claim_status",
     "coverage_info",
     "get_quote",
     "policy_renew"
+  ]
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea1d11a3f23d14fe09fc1826fc7944e89c09a634d2217d57a21dd136805ee3e8
 size 133462128

 version https://git-lfs.github.com/spec/v1
+oid sha256:161dd5df243c45251c516ab21699e4ab51a05218134b86c80fe7083d74a9c8f9
 size 133462128

model_head.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24716f977e9161a666c516b469133170acb2d57b3f07af33ae8092d7148da28e
-size 399

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ffb6d228c25fbd7acbc86f41a04b13d6098bfdd9ca996e83853e34735ea9eee
+size 18001

tokenizer.json CHANGED Viewed

@@ -1,7 +1,19 @@
 {
   "version": "1.0",
-  "truncation": null,
-  "padding": null,
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 512,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
+  "padding": {
+    "strategy": "BatchLongest",
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 0,
+    "pad_type_id": 0,
+    "pad_token": "[PAD]"
+  },
   "added_tokens": [
     {
       "id": 0,

tokenizer_config.json CHANGED Viewed

@@ -46,12 +46,19 @@
   "do_basic_tokenize": true,
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

   "do_basic_tokenize": true,
   "do_lower_case": true,
   "mask_token": "[MASK]",
+  "max_length": 512,
   "model_max_length": 512,
   "never_split": null,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }