mistral / instruct_med_dataset.hf /dataset_info.json
santuchal's picture
Upload 7 files
ff427f0 verified
{
"citation": "@article{Cohan_2018,\n title={A Discourse-Aware Attention Model for Abstractive Summarization of\n Long Documents},\n url={http://dx.doi.org/10.18653/v1/n18-2097},\n DOI={10.18653/v1/n18-2097},\n journal={Proceedings of the 2018 Conference of the North American Chapter of\n the Association for Computational Linguistics: Human Language\n Technologies, Volume 2 (Short Papers)},\n publisher={Association for Computational Linguistics},\n author={Cohan, Arman and Dernoncourt, Franck and Kim, Doo Soon and Bui, Trung and Kim, Seokhwan and Chang, Walter and Goharian, Nazli},\n year={2018}\n}",
"description": "Scientific papers datasets contains two sets of long and structured documents.\nThe datasets are obtained from ArXiv and PubMed OpenAccess repositories.\n\nBoth \"arxiv\" and \"pubmed\" have two features:\n - article: the body of the document, pagragraphs seperated by \"/n\".\n - abstract: the abstract of the document, pagragraphs seperated by \"/n\".\n - section_names: titles of sections, seperated by \"/n\".",
"features": {
"text": {
"dtype": "string",
"_type": "Value"
}
},
"homepage": "https://github.com/armancohan/long-summarization",
"license": ""
}