Spaces:
Sleeping
Sleeping
File size: 1,552 Bytes
93bc8ec 5fd26bb 55de44e 93bc8ec 5fd26bb 55de44e 93bc8ec 5fd26bb 55de44e 5fd26bb 55de44e 5fd26bb 55de44e 93bc8ec 5fd26bb 55de44e 5fd26bb 55de44e 93bc8ec 55de44e 93bc8ec 55de44e 93404c2 55de44e 93bc8ec 55de44e 93bc8ec 55de44e 93404c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import os
from bs4 import BeautifulSoup
from document_qa.grobid_processors import get_xml_nodes_body, get_xml_nodes_figures, get_xml_nodes_header
from tests.resources import TEST_DATA_PATH
def test_get_xml_nodes_body_paragraphs():
with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
soup = BeautifulSoup(fo, 'xml')
nodes = get_xml_nodes_body(soup, use_paragraphs=True)
assert len(nodes) == 70
def test_get_xml_nodes_body_sentences():
with open(os.path.join(TEST_DATA_PATH, "2312.07559.sentences.tei.xml"), 'r') as fo:
soup = BeautifulSoup(fo, 'xml')
children = get_xml_nodes_body(soup, use_paragraphs=False)
assert len(children) == 327
def test_get_xml_nodes_figures():
with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
soup = BeautifulSoup(fo, 'xml')
children = get_xml_nodes_figures(soup)
assert len(children) == 13
def test_get_xml_nodes_header_paragraphs():
with open(os.path.join(TEST_DATA_PATH, "2312.07559.paragraphs.tei.xml"), 'r') as fo:
soup = BeautifulSoup(fo, 'xml')
children = get_xml_nodes_header(soup)
assert sum([len(child) for k, child in children.items()]) == 8
def test_get_xml_nodes_header_sentences():
with open(os.path.join(TEST_DATA_PATH, "2312.07559.sentences.tei.xml"), 'r') as fo:
soup = BeautifulSoup(fo, 'xml')
children = get_xml_nodes_header(soup, use_paragraphs=False)
assert sum([len(child) for k, child in children.items()]) == 15
|