Training in progress, epoch 1

Files changed (9) hide show

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 library_name: transformers
 license: apache-2.0
-base_model: distilbert/distilbert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
 # my_awesome_model
-This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1555
 - Accuracy: 0.9641
 ## Model description
@@ -50,8 +50,8 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
-| No log        | 1.0   | 42   | 0.1542          | 0.9641   |
-| No log        | 2.0   | 84   | 0.1555          | 0.9641   |
 ### Framework versions

 ---
 library_name: transformers
 license: apache-2.0
+base_model: allenai/longformer-base-4096
 tags:
 - generated_from_trainer
 metrics:
 # my_awesome_model
+This model is a fine-tuned version of [allenai/longformer-base-4096](https://huggingface.co/allenai/longformer-base-4096) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1556
 - Accuracy: 0.9641
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
+| No log        | 1.0   | 42   | 0.1581          | 0.9641   |
+| No log        | 2.0   | 84   | 0.1556          | 0.9641   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,52 +1,33 @@
 {
-  "_name_or_path": "allenai/longformer-base-4096",
   "architectures": [
-    "LongformerForSequenceClassification"
   ],
-  "attention_mode": "longformer",
-  "attention_probs_dropout_prob": 0.1,
-  "attention_window": [
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512
-  ],
-  "bos_token_id": 0,
-  "eos_token_id": 2,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "id2label": {
     "0": "NEGATIVE",
     "1": "POSITIVE"
   },
-  "ignore_attention_mask": false,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
     "NEGATIVE": 0,
     "POSITIVE": 1
   },
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 4098,
-  "model_type": "longformer",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "onnx_export": false,
-  "pad_token_id": 1,
   "problem_type": "single_label_classification",
-  "sep_token_id": 2,
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
-  "type_vocab_size": 1,
-  "vocab_size": 50265
 }

 {
+  "_name_or_path": "distilbert/distilbert-base-uncased",
+  "activation": "gelu",
   "architectures": [
+    "DistilBertForSequenceClassification"
   ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
   "id2label": {
     "0": "NEGATIVE",
     "1": "POSITIVE"
   },
   "initializer_range": 0.02,
   "label2id": {
     "NEGATIVE": 0,
     "POSITIVE": 1
   },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
   "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.44.2",
+  "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdbf22eba9e43f584f4749d654eee94ce321b1a30d1f596b0f07a62c7d556b65
-size 594678184

 version https://git-lfs.github.com/spec/v1
+oid sha256:9fdf262b8604c5580b5f6078fb469b57ce54bb40628999da61e6cf81c9318d8e
+size 267832560

runs/Sep25_03-23-28_BISTECNB049/events.out.tfevents.1727214813.BISTECNB049.30644.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff525a15d5a42424150665b6bde942e53e7fb394bace75f57c0a90aeef4578cc
+size 5648

runs/Sep25_08-34-30_BISTECNB049/events.out.tfevents.1727233476.BISTECNB049.110772.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f7a8a617bece2fd3e60948b9f04c2a9e40e8c63e276358c78b25fb0f5039dc6
+size 5186

runs/Sep25_08-37-43_BISTECNB049/events.out.tfevents.1727233665.BISTECNB049.24204.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a97c214bd77dd8d316fe196cb9310bbed61a219785cd659a362a500e2595576
+size 6168

runs/Sep25_13-07-41_BISTECNB049/events.out.tfevents.1727249863.BISTECNB049.24204.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a8a168cf706ff86f6f4121f8f3d4be85ca555d5d61c9ea01998d0bf45b2bd35
+size 5331

runs/Sep25_14-16-53_BISTECNB049/events.out.tfevents.1727254018.BISTECNB049.19804.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:72fb659fea06a0984d940631256fe0165dae644a6332d2dc814e67ca23acae87
+size 5233

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61a94ce1268ead8118ce3c25a474e730b1a26b25885b79f78ad3fbcee9c72320
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:2198a0cfed5eb21b17ac62ccf9af66a6366a769ab139dfea809c88e7fe3a02c0
 size 5176