File size: 1,504 Bytes
e84d35a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Test loadtext."""
# pylint: diable=invalid-name
import pytest

from fastlid import fastlid

from radiobee.loadtext import loadtext
from radiobee.files2df import files2df
from radiobee.file2text import file2text
from radiobee.lists2cmat import lists2cmat
from radiobee.cmat2tset import cmat2tset
from radiobee.gen_pset import gen_pset

en = loadtext("data/en.txt")
zh = loadtext("data/zh.txt")
testen = loadtext("data/testen.txt")
testzh = loadtext("data/testzh.txt")


def test_en_zh_short1():
    """Test en_zh_short."""
    lst1 = [elm for elm in en.splitlines() if elm.strip()]
    lst2 = [elm for elm in zh.splitlines() if elm.strip()]

    lang1, _ = fastlid(en)
    lang2, _ = fastlid(zh)

    cmat0 = lists2cmat(lst1, lst2)
    pset = gen_pset(cmat0)

    assert pset.__len__() > 2


def test_en_zh_short2():
    """Test en_zh_short testen testzh."""
    # en = testen.copy()
    # zh = testzh.copy()
    lst1a = [elm for elm in testen.splitlines() if elm.strip()]
    lst2a = [elm for elm in testzh.splitlines() if elm.strip()]

    lang1a, _ = fastlid(testen)
    lang2a, _ = fastlid(testzh)

    cmat1 = lists2cmat(lst1a, lst2a)
    pset = gen_pset(cmat1)

    assert pset.__len__() > 2


_ = """
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sns.set()
sns.set_style("darkgrid")
cmap = "viridis_r"
plt.ion()

eps = 6
min_samples = 10


tset = pd.DataFrame(cmat2tset(cmat))
tset.columns = ["x", "y", "cos"]

df_ = tset

# """