Initial Commit

Browse files

Files changed (5) hide show

README.md +78 -0
config.json +45 -0
eval_results_cardiff.json +1 -0
pytorch_model.bin +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,78 @@

+---
+license: mit
+base_model: haryoaw/scenario-MDBT-TCR_data-en-cardiff_eng_only
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+- f1
+model-index:
+- name: scenario-KD-PO-CDF-EN-FROM-EN-D2_data-en-cardiff_eng_only55
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# scenario-KD-PO-CDF-EN-FROM-EN-D2_data-en-cardiff_eng_only55
+This model is a fine-tuned version of [haryoaw/scenario-MDBT-TCR_data-en-cardiff_eng_only](https://huggingface.co/haryoaw/scenario-MDBT-TCR_data-en-cardiff_eng_only) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 23.6899
+- Accuracy: 0.4665
+- F1: 0.4662
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 32
+- eval_batch_size: 32
+- seed: 55
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 30
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|
+| No log        | 1.72  | 100  | 15.5667         | 0.4356   | 0.4288 |
+| No log        | 3.45  | 200  | 17.1164         | 0.4418   | 0.4096 |
+| No log        | 5.17  | 300  | 18.8679         | 0.4634   | 0.4606 |
+| No log        | 6.9   | 400  | 19.9135         | 0.4550   | 0.4494 |
+| 9.9963        | 8.62  | 500  | 23.0517         | 0.4581   | 0.4517 |
+| 9.9963        | 10.34 | 600  | 21.4184         | 0.4493   | 0.4394 |
+| 9.9963        | 12.07 | 700  | 22.8898         | 0.4621   | 0.4584 |
+| 9.9963        | 13.79 | 800  | 22.6673         | 0.4462   | 0.4352 |
+| 9.9963        | 15.52 | 900  | 23.8054         | 0.4616   | 0.4605 |
+| 1.7937        | 17.24 | 1000 | 23.0995         | 0.4586   | 0.4524 |
+| 1.7937        | 18.97 | 1100 | 23.2337         | 0.4709   | 0.4682 |
+| 1.7937        | 20.69 | 1200 | 24.9664         | 0.4669   | 0.4646 |
+| 1.7937        | 22.41 | 1300 | 23.8143         | 0.4700   | 0.4695 |
+| 1.7937        | 24.14 | 1400 | 23.9374         | 0.4581   | 0.4546 |
+| 0.6046        | 25.86 | 1500 | 24.0218         | 0.4647   | 0.4651 |
+| 0.6046        | 27.59 | 1600 | 23.0812         | 0.4740   | 0.4735 |
+| 0.6046        | 29.31 | 1700 | 23.6899         | 0.4665   | 0.4662 |
+### Framework versions
+- Transformers 4.33.3
+- Pytorch 2.1.1+cu121
+- Datasets 2.14.5
+- Tokenizers 0.13.3

config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "_name_or_path": "haryoaw/scenario-MDBT-TCR_data-en-cardiff_eng_only",
+  "architectures": [
+    "DebertaForSequenceClassificationKD"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 768,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.3",
+  "type_vocab_size": 0,
+  "vocab_size": 251000
+}

eval_results_cardiff.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"arabic": {"f1": 0.42492992441923727, "accuracy": 0.43103448275862066, "confusion_matrix": [[153, 111, 26], [103, 140, 47], [88, 120, 82]]}, "english": {"f1": 0.5900880089025401, "accuracy": 0.5942528735632184, "confusion_matrix": [[227, 49, 14], [107, 149, 34], [48, 101, 141]]}, "french": {"f1": 0.42559459296084867, "accuracy": 0.44482758620689655, "confusion_matrix": [[129, 137, 24], [57, 195, 38], [70, 157, 63]]}, "german": {"f1": 0.5448570948217012, "accuracy": 0.5448275862068965, "confusion_matrix": [[134, 105, 51], [61, 178, 51], [36, 92, 162]]}, "hindi": {"f1": 0.430874499006803, "accuracy": 0.44022988505747124, "confusion_matrix": [[81, 99, 110], [53, 133, 104], [36, 85, 169]]}, "italian": {"f1": 0.4981809961579979, "accuracy": 0.5, "confusion_matrix": [[117, 102, 71], [32, 175, 83], [42, 105, 143]]}, "portuguese": {"f1": 0.41834557959085594, "accuracy": 0.42298850574712643, "confusion_matrix": [[92, 144, 54], [74, 166, 50], [49, 131, 110]]}, "spanish": {"f1": 0.5131262662617195, "accuracy": 0.5149425287356322, "confusion_matrix": [[148, 94, 48], [89, 117, 84], [38, 69, 183]]}, "all": {"f1": 0.4962851958962926, "accuracy": 0.4961206896551724, "confusion_matrix": [[1099, 812, 409], [536, 1303, 481], [422, 847, 1051]]}}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64525f2574b3efd73fa810667cdc4a20fa9ceea7ce0e0c44d5b913e80bc8a37f
+size 946740394

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2f4193cc3ed6e52f240815a8a101ca698d12a25bcf9d67400033baeba570fb2
+size 4600