Pragades commited on
Commit
79f4da8
1 Parent(s): 4a0ab28

Pragades/LlaMa-3.2 instruct(Interview)

Browse files
README.md CHANGED
@@ -17,8 +17,6 @@ should probably proofread and complete it, then remove this comment. -->
17
  # llama3.1-mini-QLoRA
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.
20
- It achieves the following results on the evaluation set:
21
- - Loss: 0.6375
22
 
23
  ## Model description
24
 
@@ -43,27 +41,19 @@ The following hyperparameters were used during training:
43
  - seed: 42
44
  - gradient_accumulation_steps: 4
45
  - total_train_batch_size: 32
46
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_ratio: 0.1
49
  - num_epochs: 10
50
 
51
  ### Training results
52
 
53
- | Training Loss | Epoch | Step | Validation Loss |
54
- |:-------------:|:------:|:----:|:---------------:|
55
- | 1.9432 | 1.5810 | 100 | 1.1024 |
56
- | 1.049 | 3.1621 | 200 | 0.9510 |
57
- | 0.9297 | 4.7431 | 300 | 0.8364 |
58
- | 0.8233 | 6.3241 | 400 | 0.7393 |
59
- | 0.7348 | 7.9051 | 500 | 0.6737 |
60
- | 0.6697 | 9.4862 | 600 | 0.6375 |
61
 
62
 
63
  ### Framework versions
64
 
65
- - PEFT 0.13.0
66
- - Transformers 4.45.1
67
- - Pytorch 2.4.1+cu121
68
- - Datasets 3.0.1
69
- - Tokenizers 0.20.0
 
17
  # llama3.1-mini-QLoRA
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.
 
 
20
 
21
  ## Model description
22
 
 
41
  - seed: 42
42
  - gradient_accumulation_steps: 4
43
  - total_train_batch_size: 32
44
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.1
47
  - num_epochs: 10
48
 
49
  ### Training results
50
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  ### Framework versions
54
 
55
+ - PEFT 0.13.2
56
+ - Transformers 4.46.1
57
+ - Pytorch 2.5.0+cu121
58
+ - Datasets 3.0.2
59
+ - Tokenizers 0.20.1
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "up_proj",
24
- "v_proj",
25
  "gate_proj",
26
  "o_proj",
27
  "down_proj",
 
28
  "k_proj",
29
- "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "q_proj",
 
24
  "gate_proj",
25
  "o_proj",
26
  "down_proj",
27
+ "up_proj",
28
  "k_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02eedc38c7733e0bf6866f30159d4af56d334cece9def32c16d11de07ffe2d4
3
  size 2146498720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b32a0eec53b44b90909d21ffd56d16cadc60d433fae58185cf1890e4b2701669
3
  size 2146498720
special_tokens_map.json CHANGED
@@ -7,7 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<EOS>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1659bfe2cecd29e5cfd3cd939d14c15f6d51f4d4d47973337b2f5fe87f1bfcf3
3
- size 17210382
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c91012c67bfc70b6751c108ffdcdd148d23a114efdbb18e9f639049083b79e99
3
+ size 17210390
tokenizer_config.json CHANGED
@@ -2057,7 +2057,7 @@
2057
  "special": true
2058
  },
2059
  "128257": {
2060
- "content": "<EOS>",
2061
  "lstrip": false,
2062
  "normalized": false,
2063
  "rstrip": false,
@@ -2067,7 +2067,7 @@
2067
  },
2068
  "bos_token": "<|begin_of_text|>",
2069
  "clean_up_tokenization_spaces": true,
2070
- "eos_token": "<EOS>",
2071
  "model_input_names": [
2072
  "input_ids",
2073
  "attention_mask"
 
2057
  "special": true
2058
  },
2059
  "128257": {
2060
+ "content": "<|endoftext|>",
2061
  "lstrip": false,
2062
  "normalized": false,
2063
  "rstrip": false,
 
2067
  },
2068
  "bos_token": "<|begin_of_text|>",
2069
  "clean_up_tokenization_spaces": true,
2070
+ "eos_token": "<|endoftext|>",
2071
  "model_input_names": [
2072
  "input_ids",
2073
  "attention_mask"
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc834f6a858784c874c97e60198ff811ed8f6af52e5d570b602fb58ec2e71b0
3
- size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad9b2b174b3c6205f4cb381c4d280382ffb2c0b9eb59fc1b05719e1940801c48
3
+ size 5496