Update README.md
Browse files
README.md
CHANGED
@@ -52,7 +52,7 @@ Here is the table summarizing the architecture used for training, along with the
|
|
52 |
| Hyperparameter | Value |
|
53 |
|:---------------------:|:----------:|
|
54 |
| label smoothing | 0.05 |
|
55 |
-
|
|
56 |
| betas | 0.9, 0.999 |
|
57 |
| learning rate | 5e-6 |
|
58 |
| anneal strategy | cos |
|
@@ -111,6 +111,6 @@ Citation
|
|
111 |
AUTHOR = {Cyrile Delestre},
|
112 |
URL = {https://huggingface.co/cmarkea/bloomz-7b1-mt-sft-chat},
|
113 |
YEAR = {2023},
|
114 |
-
KEYWORDS = {NLP ; Transformers ; Bloomz},
|
115 |
}
|
116 |
```
|
|
|
52 |
| Hyperparameter | Value |
|
53 |
|:---------------------:|:----------:|
|
54 |
| label smoothing | 0.05 |
|
55 |
+
| optimizer | AdamW |
|
56 |
| betas | 0.9, 0.999 |
|
57 |
| learning rate | 5e-6 |
|
58 |
| anneal strategy | cos |
|
|
|
111 |
AUTHOR = {Cyrile Delestre},
|
112 |
URL = {https://huggingface.co/cmarkea/bloomz-7b1-mt-sft-chat},
|
113 |
YEAR = {2023},
|
114 |
+
KEYWORDS = {NLP ; Transformers ; LLM; Bloomz},
|
115 |
}
|
116 |
```
|