princeton-nlp
/

gemma-2-9b-it-SimPO

Text Generation

alignment-handbook

Generated from Trainer

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

princeton-nlp commited on Jul 16

Commit

088ed5d

•

1 Parent(s): 1b0c0ab

Update README.md

Files changed (1) hide show

README.md +12 -0

README.md CHANGED Viewed

@@ -110,6 +110,8 @@ Training was done using the [alignment-handbook](https://github.com/huggingface/
 ## Citation
 @article{gemma_2024,
     title={Gemma},
     url={https://www.kaggle.com/m/3301},
@@ -118,24 +120,34 @@ Training was done using the [alignment-handbook](https://github.com/huggingface/
     author={Gemma Team},
     year={2024}
 }
 @article{meng2024simpo,
   title={{SimPO}: Simple preference optimization with a reference-free reward},
   author={Meng, Yu and Xia, Mengzhou and Chen, Danqi},
   journal={arXiv preprint arXiv:2405.14734},
   year={2024}
 }
 @article{cui2023ultrafeedback,
   title={{UltraFeedback}: Boosting language models with high-quality feedback},
   author={Cui, Ganqu and Yuan, Lifan and Ding, Ning and Yao, Guanming and Zhu, Wei and Ni, Yuan and Xie, Guotong and Liu, Zhiyuan and Sun, Maosong},
   journal={arXiv preprint arXiv:2310.01377},
   year={2023}
 }
 @article{wang2024interpretable,
   title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
   author={Wang, Haoxiang and Xiong, Wei and Xie, Tengyang and Zhao, Han and Zhang, Tong},
   journal={arXiv preprint arXiv:2406.12845},
   year={2024}
 }

 ## Citation
+gemma model:
+```
 @article{gemma_2024,
     title={Gemma},
     url={https://www.kaggle.com/m/3301},
     author={Gemma Team},
     year={2024}
 }
+```
+SimPO paper:
+```
 @article{meng2024simpo,
   title={{SimPO}: Simple preference optimization with a reference-free reward},
   author={Meng, Yu and Xia, Mengzhou and Chen, Danqi},
   journal={arXiv preprint arXiv:2405.14734},
   year={2024}
 }
+```
+UltraFeedback paper:
+```
 @article{cui2023ultrafeedback,
   title={{UltraFeedback}: Boosting language models with high-quality feedback},
   author={Cui, Ganqu and Yuan, Lifan and Ding, Ning and Yao, Guanming and Zhu, Wei and Ni, Yuan and Xie, Guotong and Liu, Zhiyuan and Sun, Maosong},
   journal={arXiv preprint arXiv:2310.01377},
   year={2023}
 }
+```
+ArmoRM paper:
+```
 @article{wang2024interpretable,
   title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
   author={Wang, Haoxiang and Xiong, Wei and Xie, Tengyang and Zhao, Han and Zhang, Tong},
   journal={arXiv preprint arXiv:2406.12845},
   year={2024}
 }
+```