learning_rate=1.41e-5 batch_size=8 epochs = 100
https://wandb.ai/krijnd/trl/runs/njt1yycg?workspace=user-krijndignum