File size: 1,890 Bytes
e84d35a
 
 
 
 
 
7b6ee4d
e84d35a
 
 
 
 
7b6ee4d
 
e84d35a
 
 
 
7b6ee4d
e84d35a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b6ee4d
e84d35a
 
 
7b6ee4d
e84d35a
 
 
 
 
 
 
7b6ee4d
e84d35a
 
 
7b6ee4d
e84d35a
 
7b6ee4d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""Test text2lists."""
from pathlib import Path
from radiobee.loadtext import loadtext
from radiobee.text2lists import text2lists


def test_text2lists_dual1():
    """Test text2lists data\test-dual.txt."""
    filename = r"data\test-dual.txt"
    text = loadtext(filename)  # noqa
    l1, l2 = text2lists(text)
    assert l2[0] in [""]
    assert "国际\n中\n双语"[:2] in l1[0]
    assert '2021' in l2[5]


def test_shakespeare1000():
    """Separate first 1000.

    from pathlib import Path
    import zipfile
    dir_loc = r""
    filename = r"莎士比亚 - 莎士比亚全集(套装共39本 英汉双语)-外语教学与研究出版社 (2016).txt.zip"
    zfile = zipfile.ZipFile(Path(dir_loc) / filename)
    res_bytes = zfile.read(zfile.infolist()[0])
    encoding = cchardet.detect(res_bytes).get("encoding")

    text1000 = []
    line = 0
    numb_lines = 4000
    for elm in res_bytes.splitlines():
        if elm.decode(encoding).strip():
            text1000.append(elm.decode(encoding))
            if line >= numb_lines - 1:
                break
            line += 1
    Path(f"data/shakespeare-zh-en-{numb_lines}.txt").write_text("\n".join(text1000), encoding="utf8")

    tset = cmat2test(cmat)
    df = pd.DataFrame(tset).rename(columns=dict(zip(range(0, 3), ['x', 'y', 'cos'])))
    plot_df(df)

    """
    # text1000a = Path("data/shakespeare-zh-en-1000.txt").read_text(encoding="utf8")
    # text2000 = Path("data/shakespeare-zh-en-1000.txt").read_text(encoding="utf8")
    text4000 = Path("data/shakespeare-zh-en-4000.txt").read_text(encoding="utf8")

    # l1000a, l10002b = text2lists(text1000)
    # l2000a, l2000b = text2lists(text2000)

    l4000, r4000 = text2lists(text4000)


def test_test_dual2():
    """Test data/test-dual.txt."""
    test_dual = Path("data/test-dual.txt").read_text(encoding="utf8")

    l_dual, r_dual = text2lists(test_dual)