victormiller commited on
Commit
22b2064
1 Parent(s): 5f4285e

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +1 -1
curated.py CHANGED
@@ -647,7 +647,7 @@ filtering_process = Div(
647
  Section(
648
  Div(
649
  H3("PubMed Central and PubMed Abstract"),
650
- P(B("Download and Extraction: "), "All files were downloaded from", A("ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/",href="ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/"),". PubMed Central (PMC) files are downloaded in an xml.tar format. The tar files are opened and converted to markdown format using pandoc", D_code("pandoc -f jats {nxml} -o {pmcid}.md", language="bash"),". The markdown files are combined to create jsonl files. PubMed Abstract (PMA) files were downloaded in xml. The BeautifulSoup library was used to extract the abstract, title, and PMID. All files were stored in jsonl format.")
651
  H4("Filtering"),
652
  P("1. Multiple filters are used here after manually verifying output of all the filters as suggested by peS2o dataset."),
653
  Ol(
 
647
  Section(
648
  Div(
649
  H3("PubMed Central and PubMed Abstract"),
650
+ P(B("Download and Extraction: "), "All files were downloaded from", A("ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/",href="ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/"),". PubMed Central (PMC) files are downloaded in an xml.tar format. The tar files are opened and converted to markdown format using pandoc", D_code("pandoc -f jats {nxml} -o {pmcid}.md", language="bash"),". The markdown files are combined to create jsonl files. PubMed Abstract (PMA) files were downloaded in xml. The BeautifulSoup library was used to extract the abstract, title, and PMID. All files were stored in jsonl format."),
651
  H4("Filtering"),
652
  P("1. Multiple filters are used here after manually verifying output of all the filters as suggested by peS2o dataset."),
653
  Ol(