File size: 802 Bytes
e84d35a
 
 
 
 
 
 
 
 
 
 
7b6ee4d
e84d35a
 
 
7b6ee4d
e84d35a
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""gen tokens for english or chinese text for a given model."""
# pylint: disable=

from typing import List, Union

from textacy.representations import Vectorizer
from radiobee.insert_spaces import insert_spaces
# from radiobee.gen_model import gen_model


def gen_vector(text: Union[str, List[str]], model: Vectorizer) -> List[float]:
    r"""Gen vector for a give model.

    Args:
        text: string of Chinese chars or English words.
        model: model used
    filename = r"data\test-dual.txt"
    text = loadtext(filename)
    list1, list2 = zip(*text2lists(text))
    model = gen_model(list1)
    """
    if isinstance(text, str):
        vec = insert_spaces(text).split()

        return model.transform(vec)

    # already same tokens as used to gen_model
    return model.transform(text)