talazz commited on
Commit
9647375
1 Parent(s): 3a89e25

your-username/falcon_finetuned

Browse files
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
- license: apache-2.0
3
  library_name: peft
4
  tags:
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
- base_model: mistralai/Mistral-7B-v0.1
9
  model-index:
10
  - name: results
11
  results: []
@@ -14,11 +13,15 @@ model-index:
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
 
 
 
 
 
 
17
  # results
18
 
19
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
20
- It achieves the following results on the evaluation set:
21
- - Loss: 0.3806
22
 
23
  ## Model description
24
 
@@ -39,24 +42,24 @@ More information needed
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
  - train_batch_size: 4
42
- - eval_batch_size: 4
43
  - seed: 42
 
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: constant
46
- - lr_scheduler_warmup_steps: 100
47
- - num_epochs: 1
 
48
 
49
  ### Training results
50
 
51
- | Training Loss | Epoch | Step | Validation Loss |
52
- |:-------------:|:-----:|:----:|:---------------:|
53
- | 0.3872 | 1.0 | 4174 | 0.3806 |
54
 
55
 
56
  ### Framework versions
57
 
58
- - PEFT 0.11.1.dev0
59
- - Transformers 4.40.2
60
  - Pytorch 2.2.1+cu121
61
  - Datasets 2.19.1
62
  - Tokenizers 0.19.1
 
1
  ---
 
2
  library_name: peft
3
  tags:
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
+ base_model: ybelkada/falcon-7b-sharded-bf16
8
  model-index:
9
  - name: results
10
  results: []
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/team2001/falcon/runs/awqj4mrk)
17
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/team2001/falcon/runs/awqj4mrk)
18
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/team2001/falcon/runs/lq1v9kx5)
19
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/team2001/falcon/runs/lq1v9kx5)
20
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/team2001/falcon/runs/lq1v9kx5)
21
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/team2001/falcon/runs/lq1v9kx5)
22
  # results
23
 
24
+ This model is a fine-tuned version of [ybelkada/falcon-7b-sharded-bf16](https://huggingface.co/ybelkada/falcon-7b-sharded-bf16) on an unknown dataset.
 
 
25
 
26
  ## Model description
27
 
 
42
  The following hyperparameters were used during training:
43
  - learning_rate: 5e-05
44
  - train_batch_size: 4
45
+ - eval_batch_size: 8
46
  - seed: 42
47
+ - gradient_accumulation_steps: 4
48
+ - total_train_batch_size: 16
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: constant
51
+ - lr_scheduler_warmup_ratio: 0.03
52
+ - num_epochs: 2
53
+ - mixed_precision_training: Native AMP
54
 
55
  ### Training results
56
 
 
 
 
57
 
58
 
59
  ### Framework versions
60
 
61
+ - PEFT 0.11.2.dev0
62
+ - Transformers 4.41.0
63
  - Pytorch 2.2.1+cu121
64
  - Datasets 2.19.1
65
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -20,14 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "up_proj",
24
- "v_proj",
25
- "q_proj",
26
- "lm_head",
27
- "gate_proj",
28
- "down_proj",
29
- "o_proj",
30
- "k_proj"
31
  ],
32
  "task_type": "CAUSAL_LM",
33
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "query_key_value",
24
+ "dense_h_to_4h",
25
+ "dense",
26
+ "dense_4h_to_h"
 
 
 
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3191c437b593a85b2422e00b5cd458c8a06e0ba0ca9d7b0c56fed5935956783
3
- size 694431312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea4724b2e8238c5839864e6fd5c887d6b01d57a2b987dc96ba109afc00986f2e
3
+ size 130583912
runs/May18_13-41-21_babd416a164d/events.out.tfevents.1716039780.babd416a164d.1437.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e33d7a4ac7f0a1355efb607f4861d16d9a94218e286c3e8a36b3b6fb6fbfa7a
3
+ size 5515
runs/May18_13-43-34_babd416a164d/events.out.tfevents.1716039828.babd416a164d.1437.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a6b614a281e46c63df507e35337ddeed3bd10c65df1d6e0020e784f150d4b0e
3
+ size 5514
runs/May18_13-45-46_babd416a164d/events.out.tfevents.1716040174.babd416a164d.1437.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a4a33eee20246382198a5c538cd504e35ff759f8e52092883df896951619001
3
+ size 5516
runs/May18_13-51-03_babd416a164d/events.out.tfevents.1716040273.babd416a164d.1437.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aac55fac9d13f289e80e4a4c138b7809bc2dbabc2acbb971aa53d7288fb8700c
3
+ size 5721
runs/May18_13-53-00_babd416a164d/events.out.tfevents.1716040397.babd416a164d.1437.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c627d54cf4343bce656d6360f94a92cfd53f3ce884cbcf35f392d05877023f01
3
+ size 5514
runs/May18_13-53-39_babd416a164d/events.out.tfevents.1716040433.babd416a164d.1437.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a156aae08598e1a69ca446a8e281474844a5f6ebe3a4ea8c3f02a5d2d1cbe9f7
3
+ size 27788
special_tokens_map.json CHANGED
@@ -1,24 +1,23 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
 
 
 
 
 
 
9
  "eos_token": {
10
- "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<unk>",
17
- "unk_token": {
18
- "content": "<unk>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ ">>TITLE<<",
4
+ ">>ABSTRACT<<",
5
+ ">>INTRODUCTION<<",
6
+ ">>SUMMARY<<",
7
+ ">>COMMENT<<",
8
+ ">>ANSWER<<",
9
+ ">>QUESTION<<",
10
+ ">>DOMAIN<<",
11
+ ">>PREFIX<<",
12
+ ">>SUFFIX<<",
13
+ ">>MIDDLE<<"
14
+ ],
15
  "eos_token": {
16
+ "content": "<|endoftext|>",
17
  "lstrip": false,
18
  "normalized": false,
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
+ "pad_token": "<|endoftext|>"
 
 
 
 
 
 
 
23
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,9 +1,8 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
- "content": "<unk>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
@@ -11,7 +10,7 @@
11
  "special": true
12
  },
13
  "1": {
14
- "content": "<s>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
@@ -19,7 +18,79 @@
19
  "special": true
20
  },
21
  "2": {
22
- "content": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
@@ -27,16 +98,22 @@
27
  "special": true
28
  }
29
  },
30
- "additional_special_tokens": [],
31
- "bos_token": "<s>",
32
- "clean_up_tokenization_spaces": false,
33
- "eos_token": "</s>",
34
- "legacy": true,
35
- "model_max_length": 1000000000000000019884624838656,
36
- "pad_token": "<unk>",
37
- "sp_model_kwargs": {},
38
- "spaces_between_special_tokens": false,
39
- "tokenizer_class": "LlamaTokenizer",
40
- "unk_token": "<unk>",
41
- "use_default_system_prompt": false
 
 
 
 
 
 
42
  }
 
1
  {
2
+ "add_prefix_space": false,
 
3
  "added_tokens_decoder": {
4
  "0": {
5
+ "content": ">>TITLE<<",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
 
10
  "special": true
11
  },
12
  "1": {
13
+ "content": ">>ABSTRACT<<",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
 
18
  "special": true
19
  },
20
  "2": {
21
+ "content": ">>INTRODUCTION<<",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": ">>SUMMARY<<",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": ">>COMMENT<<",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": ">>ANSWER<<",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": ">>QUESTION<<",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": ">>DOMAIN<<",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": ">>PREFIX<<",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": ">>SUFFIX<<",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": ">>MIDDLE<<",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<|endoftext|>",
94
  "lstrip": false,
95
  "normalized": false,
96
  "rstrip": false,
 
98
  "special": true
99
  }
100
  },
101
+ "additional_special_tokens": [
102
+ ">>TITLE<<",
103
+ ">>ABSTRACT<<",
104
+ ">>INTRODUCTION<<",
105
+ ">>SUMMARY<<",
106
+ ">>COMMENT<<",
107
+ ">>ANSWER<<",
108
+ ">>QUESTION<<",
109
+ ">>DOMAIN<<",
110
+ ">>PREFIX<<",
111
+ ">>SUFFIX<<",
112
+ ">>MIDDLE<<"
113
+ ],
114
+ "clean_up_tokenization_spaces": true,
115
+ "eos_token": "<|endoftext|>",
116
+ "model_max_length": 2048,
117
+ "pad_token": "<|endoftext|>",
118
+ "tokenizer_class": "PreTrainedTokenizerFast"
119
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:213abb780a160958fe966296f84c0c08b32c4a88dbbd320063c02f2f9d92106f
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e6dc1265cda0347e7adeec07916f41c4bcb1cb01405dae1d6aa6a77dfc24569
3
+ size 5112