Spaces:
Sleeping
Sleeping
from document_qa.document_qa_engine import TextMerger | |
def test_merge_passages_small_chunk(): | |
merger = TextMerger() | |
passages = [ | |
{ | |
'text': "The quick brown fox jumps over the tree", | |
'coordinates': '1' | |
}, | |
{ | |
'text': "and went straight into the mouth of a bear.", | |
'coordinates': '2' | |
}, | |
{ | |
'text': "The color of the colors is a color with colors", | |
'coordinates': '3' | |
}, | |
{ | |
'text': "the main colors are not the colorw we show", | |
'coordinates': '4' | |
} | |
] | |
new_passages = merger.merge_passages(passages, chunk_size=10, tolerance=0) | |
assert len(new_passages) == 4 | |
assert new_passages[0]['coordinates'] == "1" | |
assert new_passages[0]['text'] == "The quick brown fox jumps over the tree" | |
assert new_passages[1]['coordinates'] == "2" | |
assert new_passages[1]['text'] == "and went straight into the mouth of a bear." | |
assert new_passages[2]['coordinates'] == "3" | |
assert new_passages[2]['text'] == "The color of the colors is a color with colors" | |
assert new_passages[3]['coordinates'] == "4" | |
assert new_passages[3]['text'] == "the main colors are not the colorw we show" | |
def test_merge_passages_big_chunk(): | |
merger = TextMerger() | |
passages = [ | |
{ | |
'text': "The quick brown fox jumps over the tree", | |
'coordinates': '1' | |
}, | |
{ | |
'text': "and went straight into the mouth of a bear.", | |
'coordinates': '2' | |
}, | |
{ | |
'text': "The color of the colors is a color with colors", | |
'coordinates': '3' | |
}, | |
{ | |
'text': "the main colors are not the colorw we show", | |
'coordinates': '4' | |
} | |
] | |
new_passages = merger.merge_passages(passages, chunk_size=20, tolerance=0) | |
assert len(new_passages) == 2 | |
assert new_passages[0]['coordinates'] == "1;2" | |
assert new_passages[0][ | |
'text'] == "The quick brown fox jumps over the tree and went straight into the mouth of a bear." | |
assert new_passages[1]['coordinates'] == "3;4" | |
assert new_passages[1][ | |
'text'] == "The color of the colors is a color with colors the main colors are not the colorw we show" | |