ani-kavle commited on
Commit
8b73ff5
β€’
1 Parent(s): 69bffd1

Training in progress, epoch 1

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "q_proj",
25
- "up_proj",
26
  "v_proj",
 
 
 
27
  "down_proj",
28
- "o_proj",
29
- "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "o_proj",
 
 
24
  "v_proj",
25
+ "k_proj",
26
+ "up_proj",
27
+ "gate_proj",
28
  "down_proj",
29
+ "q_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab56799a8e4f4184f545a18a9d929ca9d284b1cf31eee370fe8e8c9295452e61
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32912e9c3b14c4fc18ccacf07685b08f6cf9ab08db6c76a0f8fd647da4ce3435
3
  size 319876032
config.json CHANGED
@@ -19,12 +19,27 @@
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 32,
21
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": null,
24
  "rope_theta": 1000000,
25
  "tie_word_embeddings": false,
26
- "torch_dtype": "float16",
27
  "transformers_version": "4.45.0",
28
- "use_cache": true,
29
  "vocab_size": 32016
30
  }
 
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 32,
21
  "pretraining_tp": 1,
22
+ "quantization_config": {
23
+ "_load_in_4bit": true,
24
+ "_load_in_8bit": false,
25
+ "bnb_4bit_compute_dtype": "bfloat16",
26
+ "bnb_4bit_quant_storage": "bfloat16",
27
+ "bnb_4bit_quant_type": "nf4",
28
+ "bnb_4bit_use_double_quant": true,
29
+ "llm_int8_enable_fp32_cpu_offload": false,
30
+ "llm_int8_has_fp16_weight": false,
31
+ "llm_int8_skip_modules": null,
32
+ "llm_int8_threshold": 6.0,
33
+ "load_in_4bit": true,
34
+ "load_in_8bit": false,
35
+ "quant_method": "bitsandbytes"
36
+ },
37
  "rms_norm_eps": 1e-05,
38
  "rope_scaling": null,
39
  "rope_theta": 1000000,
40
  "tie_word_embeddings": false,
41
+ "torch_dtype": "bfloat16",
42
  "transformers_version": "4.45.0",
43
+ "use_cache": false,
44
  "vocab_size": 32016
45
  }
special_tokens_map.json CHANGED
@@ -1,10 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "▁<PRE>",
4
- "▁<MID>",
5
- "▁<SUF>",
6
- "▁<EOT>"
7
- ],
8
  "bos_token": {
9
  "content": "<s>",
10
  "lstrip": false,
@@ -19,6 +13,7 @@
19
  "rstrip": false,
20
  "single_word": false
21
  },
 
22
  "unk_token": {
23
  "content": "<unk>",
24
  "lstrip": false,
 
1
  {
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "</s>",
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -25,59 +26,18 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
- },
29
- "32007": {
30
- "content": "▁<PRE>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "32008": {
38
- "content": "▁<SUF>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "32009": {
46
- "content": "▁<MID>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "32010": {
54
- "content": "▁<EOT>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
  }
61
  },
62
- "additional_special_tokens": [
63
- "▁<PRE>",
64
- "▁<MID>",
65
- "▁<SUF>",
66
- "▁<EOT>"
67
- ],
68
  "bos_token": "<s>",
69
  "clean_up_tokenization_spaces": false,
70
  "eos_token": "</s>",
71
- "eot_token": "▁<EOT>",
72
- "fill_token": "<FILL_ME>",
73
- "legacy": null,
74
- "middle_token": "▁<MID>",
75
  "model_max_length": 1000000000000000019884624838656,
76
- "pad_token": null,
77
- "prefix_token": "▁<PRE>",
78
  "sp_model_kwargs": {},
79
- "suffix_token": "▁<SUF>",
80
- "tokenizer_class": "CodeLlamaTokenizer",
81
  "unk_token": "<unk>",
82
- "use_default_system_prompt": false
 
83
  }
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
 
 
 
 
 
 
31
  "bos_token": "<s>",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
+ "legacy": true,
 
 
 
35
  "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "</s>",
 
37
  "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
  "unk_token": "<unk>",
41
+ "use_default_system_prompt": false,
42
+ "use_fast": true
43
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ababd2183db88b03b47b303e09f5c4cd2470040848d593341c6948d45dd9ab8d
3
  size 6136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3845973994f2cd9dae1ddfb30ae6677578a6f72de7723d87db38bcde4d10bc2
3
  size 6136