tangledgroup
/

tangled-llama-108m-32k-base-v0.1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on Sep 27

Commit

590516a

•

1 Parent(s): 7a40fb6

train model: tuned training

Files changed (1) hide show

scripts/model.yaml +7 -4

scripts/model.yaml CHANGED Viewed

@@ -17,7 +17,7 @@ model_config:
   parallel_residual: false
   bias: false
   norm_class_name: "RMSNorm"
-  norm_eps: 1e-05
   mlp_class_name: "LLaMAMLP"
   intermediate_size: 2048
   rope_base: 500000
@@ -88,7 +88,8 @@ train:
   max_norm: 1.0
   #   (type: float, default: 4e-05)
-  min_lr: 4.0e-05
 # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
 eval:
@@ -116,10 +117,12 @@ optimizer:
   init_args:
     #   (type: float, default: 0.001)
-    lr: 5e-5
     #   (type: float, default: 0.01)
-    weight_decay: 0.1
     #   (type: tuple, default: (0.9,0.999))
     betas:

   parallel_residual: false
   bias: false
   norm_class_name: "RMSNorm"
+  norm_eps: 1.0e-05
   mlp_class_name: "LLaMAMLP"
   intermediate_size: 2048
   rope_base: 500000
   max_norm: 1.0
   #   (type: float, default: 4e-05)
+  # min_lr: 4.0e-05
+  min_lr: 1.0e-3
 # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
 eval:
   init_args:
     #   (type: float, default: 0.001)
+    # lr: 5e-5
+    lr: 1.0e-3
     #   (type: float, default: 0.01)
+    # weight_decay: 0.1
+    weight_decay: 0.01
     #   (type: tuple, default: (0.9,0.999))
     betas: