princeton-nlp commited on
Commit
088ed5d
1 Parent(s): 1b0c0ab

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -0
README.md CHANGED
@@ -110,6 +110,8 @@ Training was done using the [alignment-handbook](https://github.com/huggingface/
110
 
111
  ## Citation
112
 
 
 
113
  @article{gemma_2024,
114
  title={Gemma},
115
  url={https://www.kaggle.com/m/3301},
@@ -118,24 +120,34 @@ Training was done using the [alignment-handbook](https://github.com/huggingface/
118
  author={Gemma Team},
119
  year={2024}
120
  }
 
121
 
 
 
122
  @article{meng2024simpo,
123
  title={{SimPO}: Simple preference optimization with a reference-free reward},
124
  author={Meng, Yu and Xia, Mengzhou and Chen, Danqi},
125
  journal={arXiv preprint arXiv:2405.14734},
126
  year={2024}
127
  }
 
128
 
 
 
129
  @article{cui2023ultrafeedback,
130
  title={{UltraFeedback}: Boosting language models with high-quality feedback},
131
  author={Cui, Ganqu and Yuan, Lifan and Ding, Ning and Yao, Guanming and Zhu, Wei and Ni, Yuan and Xie, Guotong and Liu, Zhiyuan and Sun, Maosong},
132
  journal={arXiv preprint arXiv:2310.01377},
133
  year={2023}
134
  }
 
135
 
 
 
136
  @article{wang2024interpretable,
137
  title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
138
  author={Wang, Haoxiang and Xiong, Wei and Xie, Tengyang and Zhao, Han and Zhang, Tong},
139
  journal={arXiv preprint arXiv:2406.12845},
140
  year={2024}
141
  }
 
 
110
 
111
  ## Citation
112
 
113
+ gemma model:
114
+ ```
115
  @article{gemma_2024,
116
  title={Gemma},
117
  url={https://www.kaggle.com/m/3301},
 
120
  author={Gemma Team},
121
  year={2024}
122
  }
123
+ ```
124
 
125
+ SimPO paper:
126
+ ```
127
  @article{meng2024simpo,
128
  title={{SimPO}: Simple preference optimization with a reference-free reward},
129
  author={Meng, Yu and Xia, Mengzhou and Chen, Danqi},
130
  journal={arXiv preprint arXiv:2405.14734},
131
  year={2024}
132
  }
133
+ ```
134
 
135
+ UltraFeedback paper:
136
+ ```
137
  @article{cui2023ultrafeedback,
138
  title={{UltraFeedback}: Boosting language models with high-quality feedback},
139
  author={Cui, Ganqu and Yuan, Lifan and Ding, Ning and Yao, Guanming and Zhu, Wei and Ni, Yuan and Xie, Guotong and Liu, Zhiyuan and Sun, Maosong},
140
  journal={arXiv preprint arXiv:2310.01377},
141
  year={2023}
142
  }
143
+ ```
144
 
145
+ ArmoRM paper:
146
+ ```
147
  @article{wang2024interpretable,
148
  title={Interpretable Preferences via Multi-Objective Reward Modeling and Mixture-of-Experts},
149
  author={Wang, Haoxiang and Xiong, Wei and Xie, Tengyang and Zhao, Han and Zhang, Tong},
150
  journal={arXiv preprint arXiv:2406.12845},
151
  year={2024}
152
  }
153
+ ```