Model save

Browse files

Files changed (7) hide show

README.md +51 -83
adapter_config.json +5 -5
adapter_model.bin +1 -1
adapter_model.safetensors +1 -1
config.json +1 -1
tokenizer_config.json +1 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -2,7 +2,6 @@
 license: apache-2.0
 library_name: peft
 tags:
-- axolotl
 - generated_from_trainer
 base_model: mistralai/Mistral-7B-Instruct-v0.2
 model-index:
@@ -18,94 +17,74 @@ should probably proofread and complete it, then remove this comment. -->
 axolotl version: `0.4.0`
 ```yaml
 base_model: mistralai/Mistral-7B-Instruct-v0.2
-model_type: AutoModelForCausalLM
-tokenizer_type: LlamaTokenizer
-is_mistral_derived_model: true
-load_in_8bit: false
-load_in_4bit: false
-strict: false
 chat_template: inst
-datasets:
-  - path: ./data/raw_format/tool_used_training.jsonl
-    type: sharegpt
-  - path: ./data/raw_format/tool_not_used_training.jsonl
-    type: sharegpt
-  - path: ./data/raw_format/no_tools_training.jsonl
-    type: sharegpt
 dataset_prepared_path: last_run_prepared
-val_set_size: 0.1
-output_dir: ../../text-generation-webui/loras/mistral-instruct-raw-format-v2-more-positive-inst
-adapter: lora
-lora_model_dir:
-sequence_len: 4096
-sample_packing: true
-pad_to_sequence_len: true
-lora_r: 16
 lora_alpha: 16
 lora_dropout: 0.1
 lora_target_linear: true
-lora_fan_in_fan_out:
-hub_model_id: liuylhf/mistral-lora
-wandb_project: function-call
-wandb_name: mixtral-instruct-qlora-v1
-wandb_log_model: end
-gradient_accumulation_steps: 4
 micro_batch_size: 2
 num_epochs: 2
 optimizer: paged_adamw_8bit
-lr_scheduler: cosine
-learning_rate: 0.001
-adam_beta2: 0.95
-adam_epsilon: 0.00001
-max_grad_norm: 1.0
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
 tf32: false
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-# loss_watchdog_threshold: 5.0
-# loss_watchdog_patience: 3
 warmup_steps: 10
-# evals_per_epoch: 20
-eval_steps: 0.2
-save_steps: 0.2
-eval_table_size:
-eval_max_new_tokens: 256
-# saves_per_epoch: 1
-debug:
-deepspeed:
 weight_decay: 1.0
-fsdp:
-fsdp_config:
 ```
 </details><br>
 # mistral-lora
-This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.1520
 ## Model description
@@ -138,21 +117,10 @@ The following hyperparameters were used during training:
 - lr_scheduler_warmup_steps: 10
 - num_epochs: 2
-### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 1.3787        | 0.0   | 1    | 1.4156          |
-| 0.1825        | 0.4   | 122  | 0.1627          |
-| 0.1398        | 0.8   | 244  | 0.1575          |
-| 0.0862        | 1.19  | 366  | 0.1562          |
-| 0.0868        | 1.59  | 488  | 0.1520          |
 ### Framework versions
 - PEFT 0.8.2
-- Transformers 4.38.0.dev0
 - Pytorch 2.2.0+cu121
 - Datasets 2.17.1
 - Tokenizers 0.15.0

 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: mistralai/Mistral-7B-Instruct-v0.2
 model-index:
 axolotl version: `0.4.0`
 ```yaml
+adam_beta2: 0.95
+adam_epsilon: 1.0e-05
+adapter: lora
 base_model: mistralai/Mistral-7B-Instruct-v0.2
+bf16: auto
 chat_template: inst
 dataset_prepared_path: last_run_prepared
+datasets:
+- conversation: mistral
+  path: 4e9501d816a24795b7d619faea6fe0b7/./data/raw_format/tool_used_training_small.jsonl
+  type: sharegpt
+debug: null
+deepspeed: null
+early_stopping_patience: null
+eval_max_new_tokens: 256
+eval_steps: 0.2
+eval_table_size: null
+flash_attention: true
+fp16: null
+fsdp: null
+fsdp_config: null
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+group_by_length: false
+hub_model_id: liuylhf/mistral-lora
+is_mistral_derived_model: true
+learning_rate: 0.001
+load_in_4bit: false
+load_in_8bit: false
+local_rank: null
+logging_steps: 1
 lora_alpha: 16
 lora_dropout: 0.1
+lora_fan_in_fan_out: null
+lora_model_dir: null
+lora_r: 16
 lora_target_linear: true
+lr_scheduler: cosine
+max_grad_norm: 1.0
 micro_batch_size: 2
+model_type: AutoModelForCausalLM
 num_epochs: 2
 optimizer: paged_adamw_8bit
+output_dir: ../../text-generation-webui/loras/mistral-instruct-raw-format-v2-more-positive-inst
+pad_to_sequence_len: true
+resume_from_checkpoint: null
+sample_packing: true
+save_steps: 0.2
+sequence_len: 4096
+strict: false
 tf32: false
+tokenizer_type: LlamaTokenizer
+train_on_inputs: false
+val_set_size: 0.1
+wandb_log_model: end
+wandb_name: mixtral-instruct-qlora-v1
+wandb_project: function-call
 warmup_steps: 10
 weight_decay: 1.0
+xformers_attention: null
 ```
 </details><br>
 # mistral-lora
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
 ## Model description
 - lr_scheduler_warmup_steps: 10
 - num_epochs: 2
 ### Framework versions
 - PEFT 0.8.2
+- Transformers 4.39.0.dev0
 - Pytorch 2.2.0+cu121
 - Datasets 2.17.1
 - Tokenizers 0.15.0

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
     "down_proj",
-    "up_proj",
-    "v_proj",
     "q_proj",
-    "gate_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
     "down_proj",
+    "o_proj",
     "q_proj",
+    "k_proj",
+    "v_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6b79d285afc334ab5611226c2e84e5281b514dc161f24757d0161c5ca6cec0f
 size 84047946

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc4c49f4231047910e5b89dd68dce81cbac2f8438273133924b43ce4d7b32bd4
 size 84047946

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1e322c9e264875578ffd3376c0b176458cdf2f9a545ed4ef00e6f01df4450a7
 size 83946192

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5e53c2bbc490a6f3bf85f23c969433fb56992e73dcd5ac2b9489a7ffb199196
 size 83946192

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.38.0.dev0",
   "use_cache": false,
   "vocab_size": 32000
 }

   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.39.0.dev0",
   "use_cache": false,
   "vocab_size": 32000
 }

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "add_bos_token": true,
   "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
@@ -38,7 +39,6 @@
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
-  "trust_remote_code": false,
   "unk_token": "<unk>",
   "use_default_system_prompt": false,
   "use_fast": true

 {
   "add_bos_token": true,
   "add_eos_token": false,
+  "add_prefix_space": true,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false,
   "use_fast": true

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2de5a83572cd0d10a4f700ce4fc93c48a03a750f8117aacdd75dff8cef72784
-size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:4132915575eea232b6b96893db1644b5e5e781de46620bde99e6ca920cfad17e
+size 5752