princeton-nlp
commited on
Commit
•
088ed5d
1
Parent(s):
1b0c0ab
Update README.md
Browse files
README.md
CHANGED
@@ -110,6 +110,8 @@ Training was done using the [alignment-handbook](https://github.com/huggingface/
|
|
110 |
|
111 |
## Citation
|
112 |
|
|
|
|
|
113 |
@article{gemma_2024,
|
114 |
title={Gemma},
|
115 |
url={https://www.kaggle.com/m/3301},
|
@@ -118,24 +120,34 @@ Training was done using the [alignment-handbook](https://github.com/huggingface/
|
|
118 |
author={Gemma Team},
|
119 |
year={2024}
|
120 |
}
|
|
|
121 |
|
|
|
|
|
122 |
@article{meng2024simpo,
|
123 |
title={{SimPO}: Simple preference optimization with a reference-free reward},
|
124 |
author={Meng, Yu and Xia, Mengzhou and Chen, Danqi},
|
125 |
journal={arXiv preprint arXiv:2405.14734},
|
126 |
year={2024}
|
127 |
}
|
|
|
128 |
|
|
|
|
|
129 |
@article{cui2023ultrafeedback,
|
130 |
title={{UltraFeedback}: Boosting language models with high-quality feedback},
|
131 |
author={Cui, Ganqu and Yuan, Lifan and Ding, Ning and Yao, Guanming and Zhu, Wei and Ni, Yuan and Xie, Guotong and Liu, Zhiyuan and Sun, Maosong},
|
132 |
journal={arXiv preprint arXiv:2310.01377},
|
133 |
year={2023}
|
134 |
}
|
|
|
135 |
|
|
|
|
|
136 |
@article{wang2024interpretable,
|
137 |
title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
|
138 |
author={Wang, Haoxiang and Xiong, Wei and Xie, Tengyang and Zhao, Han and Zhang, Tong},
|
139 |
journal={arXiv preprint arXiv:2406.12845},
|
140 |
year={2024}
|
141 |
}
|
|
|
|
110 |
|
111 |
## Citation
|
112 |
|
113 |
+
gemma model:
|
114 |
+
```
|
115 |
@article{gemma_2024,
|
116 |
title={Gemma},
|
117 |
url={https://www.kaggle.com/m/3301},
|
|
|
120 |
author={Gemma Team},
|
121 |
year={2024}
|
122 |
}
|
123 |
+
```
|
124 |
|
125 |
+
SimPO paper:
|
126 |
+
```
|
127 |
@article{meng2024simpo,
|
128 |
title={{SimPO}: Simple preference optimization with a reference-free reward},
|
129 |
author={Meng, Yu and Xia, Mengzhou and Chen, Danqi},
|
130 |
journal={arXiv preprint arXiv:2405.14734},
|
131 |
year={2024}
|
132 |
}
|
133 |
+
```
|
134 |
|
135 |
+
UltraFeedback paper:
|
136 |
+
```
|
137 |
@article{cui2023ultrafeedback,
|
138 |
title={{UltraFeedback}: Boosting language models with high-quality feedback},
|
139 |
author={Cui, Ganqu and Yuan, Lifan and Ding, Ning and Yao, Guanming and Zhu, Wei and Ni, Yuan and Xie, Guotong and Liu, Zhiyuan and Sun, Maosong},
|
140 |
journal={arXiv preprint arXiv:2310.01377},
|
141 |
year={2023}
|
142 |
}
|
143 |
+
```
|
144 |
|
145 |
+
ArmoRM paper:
|
146 |
+
```
|
147 |
@article{wang2024interpretable,
|
148 |
title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
|
149 |
author={Wang, Haoxiang and Xiong, Wei and Xie, Tengyang and Zhao, Han and Zhang, Tong},
|
150 |
journal={arXiv preprint arXiv:2406.12845},
|
151 |
year={2024}
|
152 |
}
|
153 |
+
```
|