liuylhf
/

empower-functions-more-tools

@@ -2,11 +2,10 @@
 license: apache-2.0
 library_name: peft
 tags:
-- axolotl
 - generated_from_trainer
 base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
 model-index:
-- name: empower-functions-smaller-training
   results: []
 ---
@@ -28,7 +27,7 @@ datasets:
   path: ./data/with_function_response/function_not_used_training_small.jsonl
   type: sharegpt
 - conversation: mistral
-  path: ./data/with_function_response/function_used_training_small.jsonl
   type: sharegpt
 debug: null
 eval_max_new_tokens: 256
@@ -41,7 +40,7 @@ fsdp_config: null
 gradient_accumulation_steps: 4
 gradient_checkpointing: true
 group_by_length: false
-hub_model_id: liuylhf/empower-functions-smaller-training
 learning_rate: 0.0002
 load_in_4bit: true
 load_in_8bit: false
@@ -62,7 +61,7 @@ micro_batch_size: 2
 model_config:
   output_router_logits: true
 model_type: AutoModelForCausalLM
-num_epochs: 4
 optimizer: paged_adamw_8bit
 output_dir: 2af0968cad514d6e9d5fb8448230e1c6/model
 pad_to_sequence_len: true
@@ -85,11 +84,9 @@ weight_decay: 0.0
 </details><br>
-# empower-functions-smaller-training
-This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.0928
 ## Model description
@@ -120,18 +117,7 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
-- num_epochs: 4
-### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 2.2084        | 0.01  | 1    | 2.1525          |
-| 0.0991        | 0.8   | 75   | 0.1072          |
-| 0.0883        | 1.59  | 150  | 0.0976          |
-| 0.0808        | 2.38  | 225  | 0.0940          |
-| 0.0679        | 3.16  | 300  | 0.0928          |
 ### Framework versions

 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
 model-index:
+- name: empower-functions-more-tools
   results: []
 ---
   path: ./data/with_function_response/function_not_used_training_small.jsonl
   type: sharegpt
 - conversation: mistral
+  path: ./data/with_function_response/more_functions/function_used_training_small.jsonl
   type: sharegpt
 debug: null
 eval_max_new_tokens: 256
 gradient_accumulation_steps: 4
 gradient_checkpointing: true
 group_by_length: false
+hub_model_id: liuylhf/empower-functions-more-tools
 learning_rate: 0.0002
 load_in_4bit: true
 load_in_8bit: false
 model_config:
   output_router_logits: true
 model_type: AutoModelForCausalLM
+num_epochs: 2
 optimizer: paged_adamw_8bit
 output_dir: 2af0968cad514d6e9d5fb8448230e1c6/model
 pad_to_sequence_len: true
 </details><br>
+# empower-functions-more-tools
+This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on an unknown dataset.
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
+- num_epochs: 2
 ### Framework versions

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ef1e12f488a9a6042827a605ebd24a9438710a3e375937d5478e5beb5c7c7a5
 size 109086416

 version https://git-lfs.github.com/spec/v1
+oid sha256:b02b6f1d8710610647cd2407571320b616b98d42734899b1693475929cee3f9c
 size 109086416