From 0715275f750b58d02ef9b3e8ed8973345593e951 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Fri, 27 Oct 2023 08:52:54 +0000 Subject: [PATCH] Update README.md --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index a8d35ca..0c6a331 100644 --- a/README.md +++ b/README.md @@ -108,3 +108,29 @@ The dataset is stored in parquet format with each entry using the following sche "prompt_id": "d938b65dfe31f05f80eb8572964c6673eddbd68eff3db6bd234d7f1e3b86c2af" } ``` + +## Citation + +If you find this dataset is useful in your work, please cite the original UltraChat dataset: + +@misc{ding2023enhancing, + title={Enhancing Chat Language Models by Scaling High-quality Instructional Conversations}, + author={Ning Ding and Yulin Chen and Bokai Xu and Yujia Qin and Zhi Zheng and Shengding Hu and Zhiyuan Liu and Maosong Sun and Bowen Zhou}, + year={2023}, + eprint={2305.14233}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} + +You may want to cite the Zephyr 7B technical report: + +``` +@misc{tunstall2023zephyr, + title={Zephyr: Direct Distillation of LM Alignment}, + author={Lewis Tunstall and Edward Beeching and Nathan Lambert and Nazneen Rajani and Kashif Rasul and Younes Belkada and Shengyi Huang and Leandro von Werra and Clémentine Fourrier and Nathan Habib and Nathan Sarrazin and Omar Sanseviero and Alexander M. Rush and Thomas Wolf}, + year={2023}, + eprint={2310.16944}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} +``` \ No newline at end of file