aaabiao commited on May 7

Commit

b015ed2

•

1 Parent(s): ea00e3d

Upload folder using huggingface_hub

Browse files

Files changed (24) hide show

README.md +54 -0
all_results.json +8 -0
model-00001-of-00019.safetensors +3 -0
model-00002-of-00019.safetensors +3 -0
model-00003-of-00019.safetensors +3 -0
model-00004-of-00019.safetensors +3 -0
model-00005-of-00019.safetensors +3 -0
model-00006-of-00019.safetensors +3 -0
model-00007-of-00019.safetensors +3 -0
model-00008-of-00019.safetensors +3 -0
model-00009-of-00019.safetensors +3 -0
model-00010-of-00019.safetensors +3 -0
model-00011-of-00019.safetensors +3 -0
model-00012-of-00019.safetensors +3 -0
model-00013-of-00019.safetensors +3 -0
model-00014-of-00019.safetensors +3 -0
model-00015-of-00019.safetensors +3 -0
model-00016-of-00019.safetensors +3 -0
model-00017-of-00019.safetensors +3 -0
model-00018-of-00019.safetensors +3 -0
model-00019-of-00019.safetensors +3 -0
train_results.json +8 -0
trainer_state.json +0 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,54 @@

+---
+license: other
+tags:
+- llama-factory
+- full
+- generated_from_trainer
+model-index:
+- name: scaleup_STEM_merged_10M_MOE_sft_0428_256
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-06
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 256
+- total_train_batch_size: 1024
+- total_eval_batch_size: 2048
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.05
+- num_epochs: 3.0
+### Training results
+### Framework versions
+- Transformers 4.40.0
+- Pytorch 2.3.0a0+40ec155e58.nv24.03
+- Datasets 2.18.0
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 4.271915416263066e+16,
+    "train_loss": 0.3954192769085303,
+    "train_runtime": 93126.1025,
+    "train_samples_per_second": 70.83,
+    "train_steps_per_second": 0.069
+}

model-00001-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5e52a34a2bbaf221fa35a10e5b52ef022fe32d45373d0664347ddd2fa221836
+size 4892809584

model-00002-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0d33ea27253324c37d83c13a84779bfdbb78f08676c53e7af6fe1daef0a2820
+size 4983004016

model-00003-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c659d265df682f0c04f74417b454577055a96408dd2c299e8ff8ebef9e8fe814
+size 4983004016

model-00004-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c3cf01845abde048c2e09f3f58993b472b4dc6fe6b13e0657ad438e0d04fb41
+size 4899035200

model-00005-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d4de45bf4528839cfce148cc578f9f5f5f3cf627e29ee0060c9102a73ddecec
+size 4983004016

model-00006-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8705e4049624af211290d6d92870409fe9dc8524f97b480eafd234437cc5c99
+size 4983004016

model-00007-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fbc56f0029e4a923e7cd23377073b5f7dae62a3e65b536736a77cb05e8a77cce
+size 4899035248

model-00008-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b0a037d215e9403d731182400eee370c48379cba2313b0c2798f374aed6bb76
+size 4983004072

model-00009-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:749a46ea190874a99bfdb88abb59a05ba567046adbd66540ac24a699b4c88e5e
+size 4983004072

model-00010-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8945749230c93f506d22dcf0c27b25976346eb143ebe9071acffdcc5dc5257e1
+size 4899035248

model-00011-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01069c00a98ad1cdc711e823b961471584c3fbaf5b5c867fcc7be7f79b6f3f0e
+size 4983004072

model-00012-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c88b407b327438b6bfa7489ec33102d7988b4c03935033b6ff2bcc001a10089
+size 4983004072

model-00013-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb7da155beb3cc6a8d12b2ad8c5988f84d2afc62ab29f940e6d702a59dbbfd84
+size 4983004072

model-00014-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d81012ed05e2945ca6519eed4da07f39869b0d8c6a41da49dbd24a5522fd552
+size 4899035248

model-00015-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4f6a0e1b984d69d25823dab2fcff806d2a99787c4ffeb22e3d2dd05bfe923b6
+size 4983004072

model-00016-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b762c941e33ad82c04e0b14e4ecf5249f05e9d9323d56e058f3cd25651741b2a
+size 4983004072

model-00017-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de252cd0286e7882d9bd3e737373dc00baad61309786dd43b5f2cf2867a6c6d6
+size 4899035248

model-00018-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e224c6ad506c91a2b4e5752c04334a17f7a4eb17703adea2a0ce2c7c52bc0d
+size 4983004072

model-00019-of-00019.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad58f83d0dd4d67228c85cbe1f354cb0c2b7a91b7e80a3c375c4356b0eb73c70
+size 4221679088

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 4.271915416263066e+16,
+    "train_loss": 0.3954192769085303,
+    "train_runtime": 93126.1025,
+    "train_samples_per_second": 70.83,
+    "train_steps_per_second": 0.069
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2bbf7579ab389ada54152ffdc3c45fda213695f93e668f3c5f733a613f44767
+size 6904