from src.data_reader import load_train_test_df | |
from src.feature_extractors.text_statistics_extractor import \ | |
HandcraftedTextFeatureExtractor | |
from src.spell_checker import SmartSpellChecker | |
def test_text_feature_generation(): | |
train_df, _ = load_train_test_df(is_testing=True) | |
spellcheck = SmartSpellChecker() | |
feature_extractor = HandcraftedTextFeatureExtractor(spellcheck) | |
feature_df = feature_extractor.generate_features(train_df.full_text) | |
assert len(feature_df.columns) == 42 | |
assert len(feature_df) == 5 | |