{ "citation": "@article{Cohan_2018,\n title={A Discourse-Aware Attention Model for Abstractive Summarization of\n Long Documents},\n url={http://dx.doi.org/10.18653/v1/n18-2097},\n DOI={10.18653/v1/n18-2097},\n journal={Proceedings of the 2018 Conference of the North American Chapter of\n the Association for Computational Linguistics: Human Language\n Technologies, Volume 2 (Short Papers)},\n publisher={Association for Computational Linguistics},\n author={Cohan, Arman and Dernoncourt, Franck and Kim, Doo Soon and Bui, Trung and Kim, Seokhwan and Chang, Walter and Goharian, Nazli},\n year={2018}\n}", "description": "Scientific papers datasets contains two sets of long and structured documents.\nThe datasets are obtained from ArXiv and PubMed OpenAccess repositories.\n\nBoth \"arxiv\" and \"pubmed\" have two features:\n - article: the body of the document, pagragraphs seperated by \"/n\".\n - abstract: the abstract of the document, pagragraphs seperated by \"/n\".\n - section_names: titles of sections, seperated by \"/n\".", "features": { "text": { "dtype": "string", "_type": "Value" } }, "homepage": "https://github.com/armancohan/long-summarization", "license": "" }