dagrodiluksha commited on
Commit
1b02fb9
1 Parent(s): c9e11ce

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
- base_model: distilbert/distilbert-base-uncased
5
  tags:
6
  - generated_from_trainer
7
  metrics:
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # my_awesome_model
18
 
19
- This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.1555
22
  - Accuracy: 0.9641
23
 
24
  ## Model description
@@ -50,8 +50,8 @@ The following hyperparameters were used during training:
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
- | No log | 1.0 | 42 | 0.1542 | 0.9641 |
54
- | No log | 2.0 | 84 | 0.1555 | 0.9641 |
55
 
56
 
57
  ### Framework versions
 
1
  ---
2
  library_name: transformers
3
  license: apache-2.0
4
+ base_model: allenai/longformer-base-4096
5
  tags:
6
  - generated_from_trainer
7
  metrics:
 
16
 
17
  # my_awesome_model
18
 
19
+ This model is a fine-tuned version of [allenai/longformer-base-4096](https://huggingface.co/allenai/longformer-base-4096) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.1556
22
  - Accuracy: 0.9641
23
 
24
  ## Model description
 
50
 
51
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | No log | 1.0 | 42 | 0.1581 | 0.9641 |
54
+ | No log | 2.0 | 84 | 0.1556 | 0.9641 |
55
 
56
 
57
  ### Framework versions
config.json CHANGED
@@ -1,52 +1,33 @@
1
  {
2
- "_name_or_path": "allenai/longformer-base-4096",
 
3
  "architectures": [
4
- "LongformerForSequenceClassification"
5
  ],
6
- "attention_mode": "longformer",
7
- "attention_probs_dropout_prob": 0.1,
8
- "attention_window": [
9
- 512,
10
- 512,
11
- 512,
12
- 512,
13
- 512,
14
- 512,
15
- 512,
16
- 512,
17
- 512,
18
- 512,
19
- 512,
20
- 512
21
- ],
22
- "bos_token_id": 0,
23
- "eos_token_id": 2,
24
- "gradient_checkpointing": false,
25
- "hidden_act": "gelu",
26
- "hidden_dropout_prob": 0.1,
27
- "hidden_size": 768,
28
  "id2label": {
29
  "0": "NEGATIVE",
30
  "1": "POSITIVE"
31
  },
32
- "ignore_attention_mask": false,
33
  "initializer_range": 0.02,
34
- "intermediate_size": 3072,
35
  "label2id": {
36
  "NEGATIVE": 0,
37
  "POSITIVE": 1
38
  },
39
- "layer_norm_eps": 1e-05,
40
- "max_position_embeddings": 4098,
41
- "model_type": "longformer",
42
- "num_attention_heads": 12,
43
- "num_hidden_layers": 12,
44
- "onnx_export": false,
45
- "pad_token_id": 1,
46
  "problem_type": "single_label_classification",
47
- "sep_token_id": 2,
 
 
 
48
  "torch_dtype": "float32",
49
  "transformers_version": "4.44.2",
50
- "type_vocab_size": 1,
51
- "vocab_size": 50265
52
  }
 
1
  {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "id2label": {
12
  "0": "NEGATIVE",
13
  "1": "POSITIVE"
14
  },
 
15
  "initializer_range": 0.02,
 
16
  "label2id": {
17
  "NEGATIVE": 0,
18
  "POSITIVE": 1
19
  },
20
+ "max_position_embeddings": 512,
21
+ "model_type": "distilbert",
22
+ "n_heads": 12,
23
+ "n_layers": 6,
24
+ "pad_token_id": 0,
 
 
25
  "problem_type": "single_label_classification",
26
+ "qa_dropout": 0.1,
27
+ "seq_classif_dropout": 0.2,
28
+ "sinusoidal_pos_embds": false,
29
+ "tie_weights_": true,
30
  "torch_dtype": "float32",
31
  "transformers_version": "4.44.2",
32
+ "vocab_size": 30522
 
33
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbf22eba9e43f584f4749d654eee94ce321b1a30d1f596b0f07a62c7d556b65
3
- size 594678184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fdf262b8604c5580b5f6078fb469b57ce54bb40628999da61e6cf81c9318d8e
3
+ size 267832560
runs/Sep25_03-23-28_BISTECNB049/events.out.tfevents.1727214813.BISTECNB049.30644.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff525a15d5a42424150665b6bde942e53e7fb394bace75f57c0a90aeef4578cc
3
+ size 5648
runs/Sep25_08-34-30_BISTECNB049/events.out.tfevents.1727233476.BISTECNB049.110772.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f7a8a617bece2fd3e60948b9f04c2a9e40e8c63e276358c78b25fb0f5039dc6
3
+ size 5186
runs/Sep25_08-37-43_BISTECNB049/events.out.tfevents.1727233665.BISTECNB049.24204.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a97c214bd77dd8d316fe196cb9310bbed61a219785cd659a362a500e2595576
3
+ size 6168
runs/Sep25_13-07-41_BISTECNB049/events.out.tfevents.1727249863.BISTECNB049.24204.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a8a168cf706ff86f6f4121f8f3d4be85ca555d5d61c9ea01998d0bf45b2bd35
3
+ size 5331
runs/Sep25_14-16-53_BISTECNB049/events.out.tfevents.1727254018.BISTECNB049.19804.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72fb659fea06a0984d940631256fe0165dae644a6332d2dc814e67ca23acae87
3
+ size 5233
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61a94ce1268ead8118ce3c25a474e730b1a26b25885b79f78ad3fbcee9c72320
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2198a0cfed5eb21b17ac62ccf9af66a6366a769ab139dfea809c88e7fe3a02c0
3
  size 5176