End of training

Browse files

Files changed (14) hide show

README.md +23 -25
config.json +1 -1
final_checkpoint/config.json +1 -1
final_checkpoint/generation_config.json +1 -1
final_checkpoint/model-00001-of-00003.safetensors +1 -1
final_checkpoint/model-00002-of-00003.safetensors +1 -1
final_checkpoint/model-00003-of-00003.safetensors +1 -1
generation_config.json +1 -1
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
tokenizer.json +7 -17
tokenizer_config.json +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.9634
 ## Model description
@@ -37,11 +37,9 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-08
-- train_batch_size: 2
 - eval_batch_size: 1
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 100
@@ -51,31 +49,31 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.9563        | 0.0447 | 50   | 1.9699          |
-| 1.9559        | 0.0895 | 100  | 1.9696          |
-| 1.9636        | 0.1342 | 150  | 1.9675          |
-| 1.9608        | 0.1790 | 200  | 1.9666          |
-| 1.9525        | 0.2237 | 250  | 1.9654          |
-| 1.9514        | 0.2685 | 300  | 1.9645          |
-| 1.9704        | 0.3132 | 350  | 1.9644          |
-| 1.9596        | 0.3579 | 400  | 1.9639          |
-| 1.9558        | 0.4027 | 450  | 1.9641          |
-| 1.9481        | 0.4474 | 500  | 1.9635          |
-| 1.945         | 0.4922 | 550  | 1.9639          |
-| 1.9532        | 0.5369 | 600  | 1.9634          |
-| 1.955         | 0.5817 | 650  | 1.9642          |
-| 1.9589        | 0.6264 | 700  | 1.9635          |
-| 1.9638        | 0.6711 | 750  | 1.9632          |
-| 1.9679        | 0.7159 | 800  | 1.9634          |
-| 1.9484        | 0.7606 | 850  | 1.9634          |
-| 1.9593        | 0.8054 | 900  | 1.9634          |
-| 1.9598        | 0.8501 | 950  | 1.9634          |
-| 1.9584        | 0.8949 | 1000 | 1.9634          |
 ### Framework versions
-- Transformers 4.42.3
 - Pytorch 2.0.0+cu117
 - Datasets 2.20.0
 - Tokenizers 0.19.1

 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.1332
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-08
+- train_batch_size: 1
 - eval_batch_size: 1
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 100
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 2.1335        | 0.0112 | 50   | 2.1446          |
+| 2.1332        | 0.0224 | 100  | 2.1433          |
+| 2.1306        | 0.0336 | 150  | 2.1414          |
+| 2.1283        | 0.0448 | 200  | 2.1388          |
+| 2.1267        | 0.0559 | 250  | 2.1362          |
+| 2.1252        | 0.0671 | 300  | 2.1353          |
+| 2.1247        | 0.0783 | 350  | 2.1345          |
+| 2.1237        | 0.0895 | 400  | 2.1356          |
+| 2.124         | 0.1007 | 450  | 2.1344          |
+| 2.1238        | 0.1119 | 500  | 2.1344          |
+| 2.1236        | 0.1231 | 550  | 2.1340          |
+| 2.1235        | 0.1343 | 600  | 2.1341          |
+| 2.1232        | 0.1454 | 650  | 2.1354          |
+| 2.1231        | 0.1566 | 700  | 2.1337          |
+| 2.1227        | 0.1678 | 750  | 2.1332          |
+| 2.1225        | 0.1790 | 800  | 2.1332          |
+| 2.1225        | 0.1902 | 850  | 2.1332          |
+| 2.1225        | 0.2014 | 900  | 2.1332          |
+| 2.1225        | 0.2126 | 950  | 2.1332          |
+| 2.1225        | 0.2238 | 1000 | 2.1332          |
 ### Framework versions
+- Transformers 4.42.4
 - Pytorch 2.0.0+cu117
 - Datasets 2.20.0
 - Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.42.3",
   "use_cache": false,
   "vocab_size": 32000
 }

   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.42.4",
   "use_cache": false,
   "vocab_size": 32000
 }

final_checkpoint/config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.42.3",
   "use_cache": false,
   "vocab_size": 32000
 }

   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.42.4",
   "use_cache": false,
   "vocab_size": 32000
 }

final_checkpoint/generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "transformers_version": "4.42.3"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "transformers_version": "4.42.4"
 }

final_checkpoint/model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:493f7d3cbed8b846171dd2cfb7f8da71a66bfbd8c551b551a2e52091bcda15a5
 size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e2532113526c3aa11b53fb2fd1ed995e44671d7a57e43cfce618f5c8d35f466
 size 4943162240

final_checkpoint/model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:162b62ce1ec028192979e4b671badfc47e123c7510a4cc76b75e2a74762c012f
 size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a4a1d94545a3eb8a0faee985392f96b44a8e6585bcc43e5a6d4d364fafe4797
 size 4999819232

final_checkpoint/model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de2da2c429b678fcea554cf120b68bd5b37e08553abbae08d8e0dc9d0812050e
 size 4540516256

 version https://git-lfs.github.com/spec/v1
+oid sha256:85ac11daef8d9cc3e1688d856eb12903a682144ffbab48cf3156aa6313de070e
 size 4540516256

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
-  "transformers_version": "4.42.3"
 }

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": 2,
+  "transformers_version": "4.42.4"
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:493f7d3cbed8b846171dd2cfb7f8da71a66bfbd8c551b551a2e52091bcda15a5
 size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e2532113526c3aa11b53fb2fd1ed995e44671d7a57e43cfce618f5c8d35f466
 size 4943162240

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:162b62ce1ec028192979e4b671badfc47e123c7510a4cc76b75e2a74762c012f
 size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a4a1d94545a3eb8a0faee985392f96b44a8e6585bcc43e5a6d4d364fafe4797
 size 4999819232

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de2da2c429b678fcea554cf120b68bd5b37e08553abbae08d8e0dc9d0812050e
 size 4540516256

 version https://git-lfs.github.com/spec/v1
+oid sha256:85ac11daef8d9cc3e1688d856eb12903a682144ffbab48cf3156aa6313de070e
 size 4540516256

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 2048,
     "strategy": "LongestFirst",
     "stride": 0
   },
@@ -36,23 +36,13 @@
       "special": true
     }
   ],
-  "normalizer": {
-    "type": "Sequence",
-    "normalizers": [
-      {
-        "type": "Prepend",
-        "prepend": "▁"
-      },
-      {
-        "type": "Replace",
-        "pattern": {
-          "String": " "
-        },
-        "content": "▁"
-      }
-    ]
   },
-  "pre_tokenizer": null,
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 1024,
     "strategy": "LongestFirst",
     "stride": 0
   },
       "special": true
     }
   ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Metaspace",
+    "replacement": "▁",
+    "prepend_scheme": "first",
+    "split": false
   },
   "post_processor": {
     "type": "TemplateProcessing",
     "single": [

tokenizer_config.json CHANGED Viewed

@@ -30,10 +30,10 @@
   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
-  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},

   },
   "additional_special_tokens": [],
   "bos_token": "<s>",
+  "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.first and system_message is defined %}\n            {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n        {%- else %}\n            {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
+  "legacy": false,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d4e02746be7366df08ac72ba280a7a91f9892ccf8769d5d107a4c9f43687351
 size 4667

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0bb04abef36dd14aaccc5712d079dbc4e20959cf85b0cf283ad952d0f0622ed
 size 4667