IggoOnCode
First version of the mamba-2.8b-slimpj-OpenOrca_1ep model and tokenizer (copy of EleutherAI/gpt-neox-20b).
b44e736
{ | |
"base_model_name": "UNTRAINED/mamba-2.8b-slimpj", | |
"base_model_class": "MambaSsmModel", | |
"loss": 0.4871, | |
"learning_rate": 1.814168657212832e-08, | |
"epoch": 1.0, | |
"current_steps": 1058463, | |
"train_runtime": 423405.7021, | |
"train_samples_per_second": 10.0, | |
"train_steps_per_second": 0.078, | |
"total_flos": 0.0, | |
"train_loss": 0.6762700151924311 | |
} |