End of training

Browse files

Files changed (8) hide show

README.md +110 -0
config.json +35 -0
model.safetensors +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,110 @@

+---
+license: apache-2.0
+base_model: distilbert-base-uncased-finetuned-sst-2-english
+tags:
+- generated_from_trainer
+metrics:
+- f1
+- accuracy
+model-index:
+- name: results
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# results
+This model is a fine-tuned version of [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3782
+- F1: 0.9100
+- Accuracy: 0.9231
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 100
+- num_epochs: 6
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | F1     | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|
+| 1.2718        | 0.11  | 100  | 0.5156          | 0.7112 | 0.7683   |
+| 0.4449        | 0.22  | 200  | 0.4094          | 0.7815 | 0.8252   |
+| 0.4407        | 0.32  | 300  | 0.3970          | 0.7928 | 0.8311   |
+| 0.4038        | 0.43  | 400  | 0.3991          | 0.8117 | 0.8379   |
+| 0.3772        | 0.54  | 500  | 0.3750          | 0.8191 | 0.8514   |
+| 0.3692        | 0.65  | 600  | 0.3737          | 0.8245 | 0.8547   |
+| 0.3738        | 0.76  | 700  | 0.3595          | 0.8194 | 0.8590   |
+| 0.3685        | 0.87  | 800  | 0.3409          | 0.8305 | 0.8631   |
+| 0.3286        | 0.97  | 900  | 0.3504          | 0.8372 | 0.8696   |
+| 0.3202        | 1.08  | 1000 | 0.3590          | 0.8344 | 0.8671   |
+| 0.2702        | 1.19  | 1100 | 0.3706          | 0.8473 | 0.8701   |
+| 0.2564        | 1.3   | 1200 | 0.3850          | 0.8449 | 0.8663   |
+| 0.2742        | 1.41  | 1300 | 0.3205          | 0.8558 | 0.8828   |
+| 0.2371        | 1.52  | 1400 | 0.3324          | 0.8646 | 0.8877   |
+| 0.2459        | 1.62  | 1500 | 0.3327          | 0.8602 | 0.8863   |
+| 0.2388        | 1.73  | 1600 | 0.3498          | 0.8679 | 0.8893   |
+| 0.2327        | 1.84  | 1700 | 0.3387          | 0.8735 | 0.8915   |
+| 0.244         | 1.95  | 1800 | 0.3381          | 0.8767 | 0.8953   |
+| 0.2096        | 2.06  | 1900 | 0.3312          | 0.8831 | 0.9034   |
+| 0.1719        | 2.16  | 2000 | 0.3358          | 0.8854 | 0.9039   |
+| 0.1507        | 2.27  | 2100 | 0.3580          | 0.8811 | 0.9020   |
+| 0.1704        | 2.38  | 2200 | 0.3440          | 0.8711 | 0.8861   |
+| 0.1526        | 2.49  | 2300 | 0.3728          | 0.8920 | 0.9093   |
+| 0.1913        | 2.6   | 2400 | 0.3450          | 0.8838 | 0.9034   |
+| 0.1313        | 2.71  | 2500 | 0.3746          | 0.8937 | 0.9104   |
+| 0.1719        | 2.81  | 2600 | 0.3204          | 0.8925 | 0.9093   |
+| 0.1719        | 2.92  | 2700 | 0.3073          | 0.8967 | 0.9145   |
+| 0.139         | 3.03  | 2800 | 0.3435          | 0.9035 | 0.9191   |
+| 0.1035        | 3.14  | 2900 | 0.3613          | 0.8959 | 0.9104   |
+| 0.1112        | 3.25  | 3000 | 0.3500          | 0.9038 | 0.9185   |
+| 0.1134        | 3.35  | 3100 | 0.3263          | 0.9065 | 0.9210   |
+| 0.1177        | 3.46  | 3200 | 0.3370          | 0.9050 | 0.9194   |
+| 0.1022        | 3.57  | 3300 | 0.3668          | 0.9038 | 0.9194   |
+| 0.1036        | 3.68  | 3400 | 0.3655          | 0.9034 | 0.9194   |
+| 0.1165        | 3.79  | 3500 | 0.3422          | 0.9069 | 0.9215   |
+| 0.1056        | 3.9   | 3600 | 0.3874          | 0.9082 | 0.9218   |
+| 0.1006        | 4.0   | 3700 | 0.3852          | 0.8943 | 0.9074   |
+| 0.0774        | 4.11  | 3800 | 0.3722          | 0.9086 | 0.9226   |
+| 0.0755        | 4.22  | 3900 | 0.3772          | 0.9087 | 0.9229   |
+| 0.0762        | 4.33  | 4000 | 0.3917          | 0.9059 | 0.9212   |
+| 0.0891        | 4.44  | 4100 | 0.3657          | 0.9078 | 0.9231   |
+| 0.0767        | 4.55  | 4200 | 0.3678          | 0.9101 | 0.9242   |
+| 0.0755        | 4.65  | 4300 | 0.3850          | 0.9095 | 0.9231   |
+| 0.0765        | 4.76  | 4400 | 0.3846          | 0.9084 | 0.9234   |
+| 0.1069        | 4.87  | 4500 | 0.3706          | 0.9109 | 0.9250   |
+| 0.0884        | 4.98  | 4600 | 0.3583          | 0.9067 | 0.9204   |
+| 0.0751        | 5.09  | 4700 | 0.3770          | 0.9087 | 0.9231   |
+| 0.0708        | 5.19  | 4800 | 0.3782          | 0.9100 | 0.9231   |
+### Framework versions
+- Transformers 4.36.2
+- Pytorch 2.1.0+cu121
+- Datasets 2.16.1
+- Tokenizers 0.15.0

config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "finetuning_task": "sst-2",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6b90b507a0140693fda7aebabf900e6772627e6593c348fd80f7bb220f2c44
+size 267832560

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "truncation": true,
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebc137b24157c01a71b50d8d9cd6822d4d389c178034c614649b2bdb10add5d9
+size 4600

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff