File size: 483 Bytes
8b414b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from src.data_reader import load_train_test_df
from src.spell_checker import SmartSpellChecker
from src.text_preprocessings.spellcheck_preprocessing import \
    SpellcheckTextPreprocessor


def test_text_cleaning():
    train_df, _ = load_train_test_df(is_testing=True)

    spellcheck = SmartSpellChecker()
    text_preprocessor = SpellcheckTextPreprocessor(spellcheck)

    cleaned_texts = text_preprocessor.preprocess_data(train_df.full_text)

    assert len(cleaned_texts) == 5