Spaces:
Running
Running
victormiller
commited on
Commit
•
22b2064
1
Parent(s):
5f4285e
Update curated.py
Browse files- curated.py +1 -1
curated.py
CHANGED
@@ -647,7 +647,7 @@ filtering_process = Div(
|
|
647 |
Section(
|
648 |
Div(
|
649 |
H3("PubMed Central and PubMed Abstract"),
|
650 |
-
P(B("Download and Extraction: "), "All files were downloaded from", A("ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/",href="ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/"),". PubMed Central (PMC) files are downloaded in an xml.tar format. The tar files are opened and converted to markdown format using pandoc", D_code("pandoc -f jats {nxml} -o {pmcid}.md", language="bash"),". The markdown files are combined to create jsonl files. PubMed Abstract (PMA) files were downloaded in xml. The BeautifulSoup library was used to extract the abstract, title, and PMID. All files were stored in jsonl format.")
|
651 |
H4("Filtering"),
|
652 |
P("1. Multiple filters are used here after manually verifying output of all the filters as suggested by peS2o dataset."),
|
653 |
Ol(
|
|
|
647 |
Section(
|
648 |
Div(
|
649 |
H3("PubMed Central and PubMed Abstract"),
|
650 |
+
P(B("Download and Extraction: "), "All files were downloaded from", A("ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/",href="ttps://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_package/"),". PubMed Central (PMC) files are downloaded in an xml.tar format. The tar files are opened and converted to markdown format using pandoc", D_code("pandoc -f jats {nxml} -o {pmcid}.md", language="bash"),". The markdown files are combined to create jsonl files. PubMed Abstract (PMA) files were downloaded in xml. The BeautifulSoup library was used to extract the abstract, title, and PMID. All files were stored in jsonl format."),
|
651 |
H4("Filtering"),
|
652 |
P("1. Multiple filters are used here after manually verifying output of all the filters as suggested by peS2o dataset."),
|
653 |
Ol(
|