Spaces:
Running
Running
HoneyTian
commited on
Commit
•
e778824
0
Parent(s):
update
Browse files- .gitattributes +35 -0
- .gitignore +11 -0
- Dockerfile +15 -0
- README.md +11 -0
- examples/tutorial_pyltp/README.md +10 -0
- examples/tutorial_pyltp/parser.py +102 -0
- examples/tutorial_pyltp/part_of_speech.py +53 -0
- examples/tutorial_pyltp/sentence_splitter.py +29 -0
- examples/tutorial_pyltp/tokenization.py +47 -0
- install.sh +84 -0
- log.py +110 -0
- main.py +132 -0
- pos_examples.json +3 -0
- project_settings.py +15 -0
- requirements.txt +3 -0
- toolbox/__init__.py +6 -0
- toolbox/json/__init__.py +6 -0
- toolbox/json/misc.py +63 -0
- toolbox/os/__init__.py +6 -0
- toolbox/os/command.py +59 -0
- toolbox/os/environment.py +114 -0
- toolbox/os/other.py +9 -0
- toolbox/part_of_speech/__init__.py +6 -0
- toolbox/part_of_speech/part_of_speech.py +30 -0
- toolbox/part_of_speech/pyltp_pos_tagger.py +34 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
.git/
|
3 |
+
.idea/
|
4 |
+
|
5 |
+
/data/
|
6 |
+
/dotenv/
|
7 |
+
/logs/
|
8 |
+
/trained_models
|
9 |
+
/temp/
|
10 |
+
|
11 |
+
**/__pycache__/
|
Dockerfile
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
RUN useradd -m -u 1000 user
|
4 |
+
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
8 |
+
|
9 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
10 |
+
|
11 |
+
COPY --chown=user . /app
|
12 |
+
|
13 |
+
RUN bash install.sh --stage 0 --stop_stage 2
|
14 |
+
|
15 |
+
CMD ["python3", "main.py"]
|
README.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Part Of Speech
|
3 |
+
emoji: 😻
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
license: apache-2.0
|
9 |
+
---
|
10 |
+
|
11 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
examples/tutorial_pyltp/README.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## pyltp
|
2 |
+
|
3 |
+
```text
|
4 |
+
工程路径:
|
5 |
+
https://github.com/HuangFJ/pyltp
|
6 |
+
|
7 |
+
模型文件:
|
8 |
+
https://ltp.ai/download.html
|
9 |
+
|
10 |
+
```
|
examples/tutorial_pyltp/parser.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
|
7 |
+
from project_settings import project_path
|
8 |
+
|
9 |
+
os.environ['NLTK_DATA'] = (project_path / "data/nltk_data").as_posix()
|
10 |
+
|
11 |
+
from nltk import DependencyGraph
|
12 |
+
from pyltp import Segmentor
|
13 |
+
from pyltp import Parser
|
14 |
+
from pyltp import Postagger
|
15 |
+
from pyltp import SementicRoleLabeller
|
16 |
+
|
17 |
+
|
18 |
+
def get_args():
|
19 |
+
parser = argparse.ArgumentParser()
|
20 |
+
|
21 |
+
parser.add_argument(
|
22 |
+
"--text",
|
23 |
+
default="元芳你怎么看?我就趴窗口上看呗!",
|
24 |
+
# default="集中竞价的方式回购公司股份",
|
25 |
+
# default=",全中国都是我的",
|
26 |
+
# default="可以啊可以",
|
27 |
+
# default="我们是免费办理的, 不会收取任何手续费和服务费, 随借随还, 可以留个备用,您看可以吗?",
|
28 |
+
# default="。啊不用不用挂断 你这个昨天来过电话 你哪有打不通",
|
29 |
+
# default="利息怎么算",
|
30 |
+
type=str
|
31 |
+
)
|
32 |
+
parser.add_argument(
|
33 |
+
"--ltp_data_dir",
|
34 |
+
default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(),
|
35 |
+
type=str
|
36 |
+
)
|
37 |
+
args = parser.parse_args()
|
38 |
+
return args
|
39 |
+
|
40 |
+
|
41 |
+
def main():
|
42 |
+
"""
|
43 |
+
句法分析, 并画出句法树
|
44 |
+
|
45 |
+
参考链接:
|
46 |
+
https://www.freesion.com/article/91401299576/
|
47 |
+
"""
|
48 |
+
args = get_args()
|
49 |
+
|
50 |
+
cws_model_path = os.path.join(args.ltp_data_dir, 'cws.model')
|
51 |
+
pos_model_path = os.path.join(args.ltp_data_dir, 'pos.model')
|
52 |
+
parser_model_path = os.path.join(args.ltp_data_dir, 'parser.model')
|
53 |
+
srl_model_path = os.path.join(args.ltp_data_dir, 'pisrl_win.model')
|
54 |
+
|
55 |
+
segmentor = Segmentor(cws_model_path)
|
56 |
+
pos_tagger = Postagger(pos_model_path)
|
57 |
+
parser = Parser(parser_model_path)
|
58 |
+
srl_labeler = SementicRoleLabeller(srl_model_path)
|
59 |
+
|
60 |
+
time_begin = time.time()
|
61 |
+
|
62 |
+
words = segmentor.segment(args.text)
|
63 |
+
words_ = [word for word in words]
|
64 |
+
print(words_)
|
65 |
+
|
66 |
+
postags = pos_tagger.postag(words)
|
67 |
+
postags_ = [postag for postag in postags]
|
68 |
+
print(postags_)
|
69 |
+
|
70 |
+
arcs = parser.parse(words, postags)
|
71 |
+
|
72 |
+
cost = time.time() - time_begin
|
73 |
+
print("cost: {}".format(cost))
|
74 |
+
|
75 |
+
tree_str = ""
|
76 |
+
for word, postag, arc in zip(words, postags, arcs):
|
77 |
+
head = arc[0]
|
78 |
+
relation = arc[1]
|
79 |
+
if head == 0:
|
80 |
+
relation = "ROOT"
|
81 |
+
|
82 |
+
line = """\t{word}({relation}/{postag})\t{postag}\t{head}\t{relation}\n""".format(
|
83 |
+
word=word,
|
84 |
+
relation=relation,
|
85 |
+
postag=postag,
|
86 |
+
head=head,
|
87 |
+
)
|
88 |
+
tree_str += line
|
89 |
+
|
90 |
+
print(tree_str)
|
91 |
+
conlltree = DependencyGraph(tree_str=tree_str)
|
92 |
+
tree = conlltree.tree()
|
93 |
+
tree.draw()
|
94 |
+
|
95 |
+
segmentor.release()
|
96 |
+
pos_tagger.release()
|
97 |
+
parser.release()
|
98 |
+
return
|
99 |
+
|
100 |
+
|
101 |
+
if __name__ == '__main__':
|
102 |
+
main()
|
examples/tutorial_pyltp/part_of_speech.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
https://huggingface.co/LTP
|
5 |
+
"""
|
6 |
+
import argparse
|
7 |
+
import os
|
8 |
+
|
9 |
+
from pyltp import Segmentor
|
10 |
+
from pyltp import Postagger
|
11 |
+
|
12 |
+
from project_settings import project_path
|
13 |
+
|
14 |
+
|
15 |
+
def get_args():
|
16 |
+
parser = argparse.ArgumentParser()
|
17 |
+
|
18 |
+
parser.add_argument(
|
19 |
+
"--text",
|
20 |
+
default="元芳你怎么看?我就趴窗口上看呗!",
|
21 |
+
type=str
|
22 |
+
)
|
23 |
+
parser.add_argument(
|
24 |
+
"--ltp_data_dir",
|
25 |
+
default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(),
|
26 |
+
type=str
|
27 |
+
)
|
28 |
+
args = parser.parse_args()
|
29 |
+
return args
|
30 |
+
|
31 |
+
|
32 |
+
def main():
|
33 |
+
args = get_args()
|
34 |
+
|
35 |
+
cws_model_path = os.path.join(args.ltp_data_dir, "cws.model")
|
36 |
+
pos_model_path = os.path.join(args.ltp_data_dir, "pos.model")
|
37 |
+
|
38 |
+
segmentor = Segmentor(cws_model_path)
|
39 |
+
postagger = Postagger(pos_model_path)
|
40 |
+
|
41 |
+
words = segmentor.segment(args.text)
|
42 |
+
postags = postagger.postag(words)
|
43 |
+
print(words)
|
44 |
+
print(postags)
|
45 |
+
|
46 |
+
segmentor.release()
|
47 |
+
postagger.release()
|
48 |
+
|
49 |
+
return
|
50 |
+
|
51 |
+
|
52 |
+
if __name__ == "__main__":
|
53 |
+
main()
|
examples/tutorial_pyltp/sentence_splitter.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
|
5 |
+
from pyltp import SentenceSplitter
|
6 |
+
|
7 |
+
|
8 |
+
def get_args():
|
9 |
+
parser = argparse.ArgumentParser()
|
10 |
+
|
11 |
+
parser.add_argument(
|
12 |
+
"--text",
|
13 |
+
default="元芳你怎么看?我就趴窗口上看呗!",
|
14 |
+
type=str
|
15 |
+
)
|
16 |
+
args = parser.parse_args()
|
17 |
+
return args
|
18 |
+
|
19 |
+
|
20 |
+
def main():
|
21 |
+
args = get_args()
|
22 |
+
|
23 |
+
sent_list = SentenceSplitter.split(args.text)
|
24 |
+
print(sent_list)
|
25 |
+
return
|
26 |
+
|
27 |
+
|
28 |
+
if __name__ == "__main__":
|
29 |
+
main()
|
examples/tutorial_pyltp/tokenization.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
https://huggingface.co/LTP
|
5 |
+
"""
|
6 |
+
import argparse
|
7 |
+
import os
|
8 |
+
|
9 |
+
from pyltp import Segmentor
|
10 |
+
|
11 |
+
from project_settings import project_path
|
12 |
+
|
13 |
+
|
14 |
+
def get_args():
|
15 |
+
parser = argparse.ArgumentParser()
|
16 |
+
|
17 |
+
parser.add_argument(
|
18 |
+
"--text",
|
19 |
+
default="元芳你怎么看?我就趴窗口上看呗!",
|
20 |
+
type=str
|
21 |
+
)
|
22 |
+
parser.add_argument(
|
23 |
+
"--ltp_data_dir",
|
24 |
+
default=(project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix(),
|
25 |
+
type=str
|
26 |
+
)
|
27 |
+
args = parser.parse_args()
|
28 |
+
return args
|
29 |
+
|
30 |
+
|
31 |
+
def main():
|
32 |
+
args = get_args()
|
33 |
+
|
34 |
+
cws_model_path = os.path.join(args.ltp_data_dir, "cws.model")
|
35 |
+
|
36 |
+
segmentor = Segmentor(cws_model_path)
|
37 |
+
|
38 |
+
words = segmentor.segment(args.text)
|
39 |
+
print(words)
|
40 |
+
|
41 |
+
segmentor.release()
|
42 |
+
|
43 |
+
return
|
44 |
+
|
45 |
+
|
46 |
+
if __name__ == "__main__":
|
47 |
+
main()
|
install.sh
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# bash install.sh --stage 0 --stop_stage 2
|
4 |
+
# bash install.sh --stage 1 --stop_stage 1
|
5 |
+
|
6 |
+
verbose=true;
|
7 |
+
stage=-1
|
8 |
+
stop_stage=0
|
9 |
+
|
10 |
+
|
11 |
+
# parse options
|
12 |
+
while true; do
|
13 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
14 |
+
case "$1" in
|
15 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
16 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
17 |
+
old_value="(eval echo \\$$name)";
|
18 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
19 |
+
was_bool=true;
|
20 |
+
else
|
21 |
+
was_bool=false;
|
22 |
+
fi
|
23 |
+
|
24 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
25 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
26 |
+
eval "${name}=\"$2\"";
|
27 |
+
|
28 |
+
# Check that Boolean-valued arguments are really Boolean.
|
29 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
30 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
31 |
+
exit 1;
|
32 |
+
fi
|
33 |
+
shift 2;
|
34 |
+
;;
|
35 |
+
|
36 |
+
*) break;
|
37 |
+
esac
|
38 |
+
done
|
39 |
+
|
40 |
+
work_dir="$(pwd)"
|
41 |
+
data_dir="$(pwd)/data"
|
42 |
+
nltk_data_dir="${data_dir}/nltk_data"
|
43 |
+
nltk_data_tokenizers_dir="${nltk_data_dir}/tokenizers"
|
44 |
+
pyltp_models_dir="${data_dir}/pyltp_models"
|
45 |
+
|
46 |
+
mkdir -p "${data_dir}"
|
47 |
+
mkdir -p "${nltk_data_dir}"
|
48 |
+
mkdir -p "${nltk_data_tokenizers_dir}"
|
49 |
+
mkdir -p "${pyltp_models_dir}"
|
50 |
+
|
51 |
+
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
52 |
+
$verbose && echo "stage 0: download nltk data punkt"
|
53 |
+
cd "${nltk_data_tokenizers_dir}" || exit 1;
|
54 |
+
|
55 |
+
# https://www.nltk.org/nltk_data/
|
56 |
+
if [ ! -d "punkt" ]; then
|
57 |
+
# nltk==3.8.1
|
58 |
+
wget -c https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
|
59 |
+
unzip punkt.zip
|
60 |
+
rm punkt.zip
|
61 |
+
fi
|
62 |
+
|
63 |
+
if [ ! -d "punkt_tab" ]; then
|
64 |
+
# nltk==3.8.2
|
65 |
+
wget -c https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip
|
66 |
+
unzip punkt_tab.zip
|
67 |
+
rm punkt_tab.zip
|
68 |
+
fi
|
69 |
+
fi
|
70 |
+
|
71 |
+
|
72 |
+
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
73 |
+
$verbose && echo "stage 0: download pyltp models"
|
74 |
+
cd "${pyltp_models_dir}" || exit 1;
|
75 |
+
|
76 |
+
# pyltp
|
77 |
+
# https://ltp.ai/download.html
|
78 |
+
if [ ! -e "resources.json" ]; then
|
79 |
+
wget -c http://model.scir.yunfutech.com/model/ltp_data_v3.4.0.zip
|
80 |
+
unzip ltp_data_v3.4.0.zip
|
81 |
+
rm ltp_data_v3.4.0.zip
|
82 |
+
fi
|
83 |
+
|
84 |
+
fi
|
log.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import logging
|
4 |
+
from logging.handlers import TimedRotatingFileHandler
|
5 |
+
import os
|
6 |
+
|
7 |
+
|
8 |
+
def setup(log_directory: str):
|
9 |
+
fmt = "%(asctime)s - %(name)s - %(levelname)s %(filename)s:%(lineno)d > %(message)s"
|
10 |
+
|
11 |
+
stream_handler = logging.StreamHandler()
|
12 |
+
stream_handler.setLevel(logging.INFO)
|
13 |
+
stream_handler.setFormatter(logging.Formatter(fmt))
|
14 |
+
|
15 |
+
# main
|
16 |
+
main_logger = logging.getLogger("main")
|
17 |
+
main_logger.addHandler(stream_handler)
|
18 |
+
main_info_file_handler = TimedRotatingFileHandler(
|
19 |
+
filename=os.path.join(log_directory, "main.log"),
|
20 |
+
encoding="utf-8",
|
21 |
+
when="midnight",
|
22 |
+
interval=1,
|
23 |
+
backupCount=30
|
24 |
+
)
|
25 |
+
main_info_file_handler.setLevel(logging.INFO)
|
26 |
+
main_info_file_handler.setFormatter(logging.Formatter(fmt))
|
27 |
+
main_logger.addHandler(main_info_file_handler)
|
28 |
+
|
29 |
+
# http
|
30 |
+
http_logger = logging.getLogger("http")
|
31 |
+
http_file_handler = TimedRotatingFileHandler(
|
32 |
+
filename=os.path.join(log_directory, "http.log"),
|
33 |
+
encoding='utf-8',
|
34 |
+
when="midnight",
|
35 |
+
interval=1,
|
36 |
+
backupCount=30
|
37 |
+
)
|
38 |
+
http_file_handler.setLevel(logging.DEBUG)
|
39 |
+
http_file_handler.setFormatter(logging.Formatter(fmt))
|
40 |
+
http_logger.addHandler(http_file_handler)
|
41 |
+
|
42 |
+
# api
|
43 |
+
api_logger = logging.getLogger("api")
|
44 |
+
api_file_handler = TimedRotatingFileHandler(
|
45 |
+
filename=os.path.join(log_directory, "api.log"),
|
46 |
+
encoding='utf-8',
|
47 |
+
when="midnight",
|
48 |
+
interval=1,
|
49 |
+
backupCount=30
|
50 |
+
)
|
51 |
+
api_file_handler.setLevel(logging.DEBUG)
|
52 |
+
api_file_handler.setFormatter(logging.Formatter(fmt))
|
53 |
+
api_logger.addHandler(api_file_handler)
|
54 |
+
|
55 |
+
# alarm
|
56 |
+
alarm_logger = logging.getLogger("alarm")
|
57 |
+
alarm_file_handler = TimedRotatingFileHandler(
|
58 |
+
filename=os.path.join(log_directory, "alarm.log"),
|
59 |
+
encoding="utf-8",
|
60 |
+
when="midnight",
|
61 |
+
interval=1,
|
62 |
+
backupCount=30
|
63 |
+
)
|
64 |
+
alarm_file_handler.setLevel(logging.DEBUG)
|
65 |
+
alarm_file_handler.setFormatter(logging.Formatter(fmt))
|
66 |
+
alarm_logger.addHandler(alarm_file_handler)
|
67 |
+
|
68 |
+
debug_file_handler = TimedRotatingFileHandler(
|
69 |
+
filename=os.path.join(log_directory, "debug.log"),
|
70 |
+
encoding="utf-8",
|
71 |
+
when="D",
|
72 |
+
interval=1,
|
73 |
+
backupCount=7
|
74 |
+
)
|
75 |
+
debug_file_handler.setLevel(logging.DEBUG)
|
76 |
+
debug_file_handler.setFormatter(logging.Formatter(fmt))
|
77 |
+
|
78 |
+
info_file_handler = TimedRotatingFileHandler(
|
79 |
+
filename=os.path.join(log_directory, "info.log"),
|
80 |
+
encoding="utf-8",
|
81 |
+
when="D",
|
82 |
+
interval=1,
|
83 |
+
backupCount=7
|
84 |
+
)
|
85 |
+
info_file_handler.setLevel(logging.INFO)
|
86 |
+
info_file_handler.setFormatter(logging.Formatter(fmt))
|
87 |
+
|
88 |
+
error_file_handler = TimedRotatingFileHandler(
|
89 |
+
filename=os.path.join(log_directory, "error.log"),
|
90 |
+
encoding="utf-8",
|
91 |
+
when="D",
|
92 |
+
interval=1,
|
93 |
+
backupCount=7
|
94 |
+
)
|
95 |
+
error_file_handler.setLevel(logging.ERROR)
|
96 |
+
error_file_handler.setFormatter(logging.Formatter(fmt))
|
97 |
+
|
98 |
+
logging.basicConfig(
|
99 |
+
level=logging.DEBUG,
|
100 |
+
datefmt="%a, %d %b %Y %H:%M:%S",
|
101 |
+
handlers=[
|
102 |
+
debug_file_handler,
|
103 |
+
info_file_handler,
|
104 |
+
error_file_handler,
|
105 |
+
]
|
106 |
+
)
|
107 |
+
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
pass
|
main.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
import asyncio
|
5 |
+
import logging
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
import platform
|
9 |
+
import time
|
10 |
+
|
11 |
+
from project_settings import project_path
|
12 |
+
|
13 |
+
os.environ["NLTK_DATA"] = (project_path / "data/nltk_data").as_posix()
|
14 |
+
os.environ["LTP_DATA_DIR"] = (project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix()
|
15 |
+
|
16 |
+
from project_settings import project_path, log_directory
|
17 |
+
import log
|
18 |
+
|
19 |
+
log.setup(log_directory=log_directory)
|
20 |
+
|
21 |
+
import gradio as gr
|
22 |
+
|
23 |
+
from toolbox.os.command import Command
|
24 |
+
from toolbox.part_of_speech.part_of_speech import language_to_engines, engine_to_tagger, pos_parser
|
25 |
+
|
26 |
+
main_logger = logging.getLogger("main")
|
27 |
+
|
28 |
+
|
29 |
+
def get_args():
|
30 |
+
parser = argparse.ArgumentParser()
|
31 |
+
|
32 |
+
parser.add_argument(
|
33 |
+
"--pos_example_json_file",
|
34 |
+
default=(project_path / "pos_examples.json").as_posix(),
|
35 |
+
type=str
|
36 |
+
)
|
37 |
+
args = parser.parse_args()
|
38 |
+
return args
|
39 |
+
|
40 |
+
|
41 |
+
def pos_tag(text: str, language: str, engine: str) -> str:
|
42 |
+
begin = time.time()
|
43 |
+
|
44 |
+
try:
|
45 |
+
words, postags = pos_parser(text, engine)
|
46 |
+
result = ""
|
47 |
+
for word, postag in zip(words, postags):
|
48 |
+
row = f"{word}/{postag}"
|
49 |
+
result += f"{row}\t"
|
50 |
+
|
51 |
+
time_cost = time.time() - begin
|
52 |
+
result += f"\n\ntime_cost: {round(time_cost, 4)}"
|
53 |
+
return result
|
54 |
+
except Exception as e:
|
55 |
+
result = f"{type(e)}\n{str(e)}"
|
56 |
+
return result
|
57 |
+
|
58 |
+
|
59 |
+
def shell(cmd: str):
|
60 |
+
return Command.popen(cmd)
|
61 |
+
|
62 |
+
|
63 |
+
def main():
|
64 |
+
args = get_args()
|
65 |
+
|
66 |
+
with open(args.pos_example_json_file, "r", encoding="utf-8") as f:
|
67 |
+
pos_examples: list = json.load(f)
|
68 |
+
|
69 |
+
def get_languages_by_engine(engine: str):
|
70 |
+
language_list = list()
|
71 |
+
for k, v in language_to_engines.items():
|
72 |
+
if engine in v:
|
73 |
+
language_list.append(k)
|
74 |
+
return gr.Dropdown(choices=language_list, value=language_list[0], label="language")
|
75 |
+
|
76 |
+
pos_language_choices = list(language_to_engines.keys())
|
77 |
+
pos_engine_choices = list(engine_to_tagger.keys())
|
78 |
+
|
79 |
+
# blocks
|
80 |
+
with gr.Blocks() as blocks:
|
81 |
+
gr.Markdown(value="## 词性标注.")
|
82 |
+
|
83 |
+
with gr.Tabs():
|
84 |
+
with gr.TabItem("part of speech"):
|
85 |
+
pos_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text")
|
86 |
+
|
87 |
+
with gr.Row():
|
88 |
+
pos_language = gr.Dropdown(
|
89 |
+
choices=pos_language_choices, value=pos_language_choices[0],
|
90 |
+
label="language"
|
91 |
+
)
|
92 |
+
pos_engine = gr.Dropdown(
|
93 |
+
choices=pos_engine_choices, value=pos_engine_choices[0],
|
94 |
+
label="engine"
|
95 |
+
)
|
96 |
+
|
97 |
+
pos_engine.change(
|
98 |
+
get_languages_by_engine,
|
99 |
+
inputs=[pos_engine],
|
100 |
+
outputs=[pos_language],
|
101 |
+
)
|
102 |
+
pos_output = gr.Textbox(lines=4, max_lines=50, label="output")
|
103 |
+
pos_button = gr.Button(value="pos_tag", variant="primary")
|
104 |
+
pos_button.click(
|
105 |
+
pos_tag,
|
106 |
+
inputs=[pos_text, pos_language, pos_engine],
|
107 |
+
outputs=[pos_output],
|
108 |
+
)
|
109 |
+
|
110 |
+
gr.Examples(
|
111 |
+
examples=pos_examples,
|
112 |
+
inputs=[pos_text, pos_language, pos_engine],
|
113 |
+
outputs=[pos_output],
|
114 |
+
fn=pos_tag,
|
115 |
+
)
|
116 |
+
|
117 |
+
with gr.TabItem("shell"):
|
118 |
+
shell_text = gr.Textbox(label="cmd")
|
119 |
+
shell_button = gr.Button("run")
|
120 |
+
shell_output = gr.Textbox(label="output")
|
121 |
+
shell_button.click(shell, inputs=[shell_text,], outputs=[shell_output])
|
122 |
+
|
123 |
+
blocks.queue().launch(
|
124 |
+
share=False if platform.system() == "Windows" else False,
|
125 |
+
server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
|
126 |
+
server_port=7860,
|
127 |
+
)
|
128 |
+
return
|
129 |
+
|
130 |
+
|
131 |
+
if __name__ == "__main__":
|
132 |
+
main()
|
pos_examples.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
["元芳你怎么看?我就趴窗口上看呗!", "chinese", "pyltp"]
|
3 |
+
]
|
project_settings.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import os
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
|
7 |
+
project_path = os.path.abspath(os.path.dirname(__file__))
|
8 |
+
project_path = Path(project_path)
|
9 |
+
|
10 |
+
log_directory = project_path / "logs"
|
11 |
+
log_directory.mkdir(parents=True, exist_ok=True)
|
12 |
+
|
13 |
+
|
14 |
+
if __name__ == '__main__':
|
15 |
+
pass
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
nltk==3.8.1
|
3 |
+
pyltp==0.4.0
|
toolbox/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == "__main__":
|
6 |
+
pass
|
toolbox/json/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/json/misc.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
from typing import Callable
|
4 |
+
|
5 |
+
|
6 |
+
def traverse(js, callback: Callable, *args, **kwargs):
|
7 |
+
if isinstance(js, list):
|
8 |
+
result = list()
|
9 |
+
for l in js:
|
10 |
+
l = traverse(l, callback, *args, **kwargs)
|
11 |
+
result.append(l)
|
12 |
+
return result
|
13 |
+
elif isinstance(js, tuple):
|
14 |
+
result = list()
|
15 |
+
for l in js:
|
16 |
+
l = traverse(l, callback, *args, **kwargs)
|
17 |
+
result.append(l)
|
18 |
+
return tuple(result)
|
19 |
+
elif isinstance(js, dict):
|
20 |
+
result = dict()
|
21 |
+
for k, v in js.items():
|
22 |
+
k = traverse(k, callback, *args, **kwargs)
|
23 |
+
v = traverse(v, callback, *args, **kwargs)
|
24 |
+
result[k] = v
|
25 |
+
return result
|
26 |
+
elif isinstance(js, int):
|
27 |
+
return callback(js, *args, **kwargs)
|
28 |
+
elif isinstance(js, str):
|
29 |
+
return callback(js, *args, **kwargs)
|
30 |
+
else:
|
31 |
+
return js
|
32 |
+
|
33 |
+
|
34 |
+
def demo1():
|
35 |
+
d = {
|
36 |
+
"env": "ppe",
|
37 |
+
"mysql_connect": {
|
38 |
+
"host": "$mysql_connect_host",
|
39 |
+
"port": 3306,
|
40 |
+
"user": "callbot",
|
41 |
+
"password": "NxcloudAI2021!",
|
42 |
+
"database": "callbot_ppe",
|
43 |
+
"charset": "utf8"
|
44 |
+
},
|
45 |
+
"es_connect": {
|
46 |
+
"hosts": ["10.20.251.8"],
|
47 |
+
"http_auth": ["elastic", "ElasticAI2021!"],
|
48 |
+
"port": 9200
|
49 |
+
}
|
50 |
+
}
|
51 |
+
|
52 |
+
def callback(s):
|
53 |
+
if isinstance(s, str) and s.startswith('$'):
|
54 |
+
return s[1:]
|
55 |
+
return s
|
56 |
+
|
57 |
+
result = traverse(d, callback=callback)
|
58 |
+
print(result)
|
59 |
+
return
|
60 |
+
|
61 |
+
|
62 |
+
if __name__ == '__main__':
|
63 |
+
demo1()
|
toolbox/os/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/os/command.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
class Command(object):
|
7 |
+
custom_command = [
|
8 |
+
"cd"
|
9 |
+
]
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def _get_cmd(command):
|
13 |
+
command = str(command).strip()
|
14 |
+
if command == "":
|
15 |
+
return None
|
16 |
+
cmd_and_args = command.split(sep=" ")
|
17 |
+
cmd = cmd_and_args[0]
|
18 |
+
args = " ".join(cmd_and_args[1:])
|
19 |
+
return cmd, args
|
20 |
+
|
21 |
+
@classmethod
|
22 |
+
def popen(cls, command):
|
23 |
+
cmd, args = cls._get_cmd(command)
|
24 |
+
if cmd in cls.custom_command:
|
25 |
+
method = getattr(cls, cmd)
|
26 |
+
return method(args)
|
27 |
+
else:
|
28 |
+
resp = os.popen(command)
|
29 |
+
result = resp.read()
|
30 |
+
resp.close()
|
31 |
+
return result
|
32 |
+
|
33 |
+
@classmethod
|
34 |
+
def cd(cls, args):
|
35 |
+
if args.startswith("/"):
|
36 |
+
os.chdir(args)
|
37 |
+
else:
|
38 |
+
pwd = os.getcwd()
|
39 |
+
path = os.path.join(pwd, args)
|
40 |
+
os.chdir(path)
|
41 |
+
|
42 |
+
@classmethod
|
43 |
+
def system(cls, command):
|
44 |
+
return os.system(command)
|
45 |
+
|
46 |
+
def __init__(self):
|
47 |
+
pass
|
48 |
+
|
49 |
+
|
50 |
+
def ps_ef_grep(keyword: str):
|
51 |
+
cmd = "ps -ef | grep {}".format(keyword)
|
52 |
+
rows = Command.popen(cmd)
|
53 |
+
rows = str(rows).split("\n")
|
54 |
+
rows = [row for row in rows if row.__contains__(keyword) and not row.__contains__("grep")]
|
55 |
+
return rows
|
56 |
+
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
+
pass
|
toolbox/os/environment.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from dotenv.main import DotEnv
|
8 |
+
|
9 |
+
from toolbox.json.misc import traverse
|
10 |
+
|
11 |
+
|
12 |
+
class EnvironmentManager(object):
|
13 |
+
def __init__(self, path, env, override=False):
|
14 |
+
filename = os.path.join(path, '{}.env'.format(env))
|
15 |
+
self.filename = filename
|
16 |
+
|
17 |
+
load_dotenv(
|
18 |
+
dotenv_path=filename,
|
19 |
+
override=override
|
20 |
+
)
|
21 |
+
|
22 |
+
self._environ = dict()
|
23 |
+
|
24 |
+
def open_dotenv(self, filename: str = None):
|
25 |
+
filename = filename or self.filename
|
26 |
+
dotenv = DotEnv(
|
27 |
+
dotenv_path=filename,
|
28 |
+
stream=None,
|
29 |
+
verbose=False,
|
30 |
+
interpolate=False,
|
31 |
+
override=False,
|
32 |
+
encoding="utf-8",
|
33 |
+
)
|
34 |
+
result = dotenv.dict()
|
35 |
+
return result
|
36 |
+
|
37 |
+
def get(self, key, default=None, dtype=str):
|
38 |
+
result = os.environ.get(key)
|
39 |
+
if result is None:
|
40 |
+
if default is None:
|
41 |
+
result = None
|
42 |
+
else:
|
43 |
+
result = default
|
44 |
+
else:
|
45 |
+
result = dtype(result)
|
46 |
+
self._environ[key] = result
|
47 |
+
return result
|
48 |
+
|
49 |
+
|
50 |
+
_DEFAULT_DTYPE_MAP = {
|
51 |
+
'int': int,
|
52 |
+
'float': float,
|
53 |
+
'str': str,
|
54 |
+
'json.loads': json.loads
|
55 |
+
}
|
56 |
+
|
57 |
+
|
58 |
+
class JsonConfig(object):
|
59 |
+
"""
|
60 |
+
将 json 中, 形如 `$float:threshold` 的值, 处理为:
|
61 |
+
从环境变量中查到 threshold, 再将其转换为 float 类型.
|
62 |
+
"""
|
63 |
+
def __init__(self, dtype_map: dict = None, environment: EnvironmentManager = None):
|
64 |
+
self.dtype_map = dtype_map or _DEFAULT_DTYPE_MAP
|
65 |
+
self.environment = environment or os.environ
|
66 |
+
|
67 |
+
def sanitize_by_filename(self, filename: str):
|
68 |
+
with open(filename, 'r', encoding='utf-8') as f:
|
69 |
+
js = json.load(f)
|
70 |
+
|
71 |
+
return self.sanitize_by_json(js)
|
72 |
+
|
73 |
+
def sanitize_by_json(self, js):
|
74 |
+
js = traverse(
|
75 |
+
js,
|
76 |
+
callback=self.sanitize,
|
77 |
+
environment=self.environment
|
78 |
+
)
|
79 |
+
return js
|
80 |
+
|
81 |
+
def sanitize(self, string, environment):
|
82 |
+
"""支持 $ 符开始的, 环境变量配置"""
|
83 |
+
if isinstance(string, str) and string.startswith('$'):
|
84 |
+
dtype, key = string[1:].split(':')
|
85 |
+
dtype = self.dtype_map[dtype]
|
86 |
+
|
87 |
+
value = environment.get(key)
|
88 |
+
if value is None:
|
89 |
+
raise AssertionError('environment not exist. key: {}'.format(key))
|
90 |
+
|
91 |
+
value = dtype(value)
|
92 |
+
result = value
|
93 |
+
else:
|
94 |
+
result = string
|
95 |
+
return result
|
96 |
+
|
97 |
+
|
98 |
+
def demo1():
|
99 |
+
import json
|
100 |
+
|
101 |
+
from project_settings import project_path
|
102 |
+
|
103 |
+
environment = EnvironmentManager(
|
104 |
+
path=os.path.join(project_path, 'server/callbot_server/dotenv'),
|
105 |
+
env='dev',
|
106 |
+
)
|
107 |
+
init_scenes = environment.get(key='init_scenes', dtype=json.loads)
|
108 |
+
print(init_scenes)
|
109 |
+
print(environment._environ)
|
110 |
+
return
|
111 |
+
|
112 |
+
|
113 |
+
if __name__ == '__main__':
|
114 |
+
demo1()
|
toolbox/os/other.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import inspect
|
3 |
+
|
4 |
+
|
5 |
+
def pwd():
|
6 |
+
"""你在哪个文件调用此函数, 它就会返回那个文件所在的 dir 目标"""
|
7 |
+
frame = inspect.stack()[1]
|
8 |
+
module = inspect.getmodule(frame[0])
|
9 |
+
return os.path.dirname(os.path.abspath(module.__file__))
|
toolbox/part_of_speech/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == "__main__":
|
6 |
+
pass
|
toolbox/part_of_speech/part_of_speech.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
from typing import Callable, Dict, List, Tuple, Union
|
4 |
+
|
5 |
+
from toolbox.part_of_speech.pyltp_pos_tagger import pyltp_pos_tagger
|
6 |
+
|
7 |
+
|
8 |
+
language_to_engines = {
|
9 |
+
"chinese": ["pyltp"]
|
10 |
+
}
|
11 |
+
|
12 |
+
|
13 |
+
engine_to_tagger: Dict[str, Callable] = {
|
14 |
+
"pyltp": pyltp_pos_tagger
|
15 |
+
}
|
16 |
+
|
17 |
+
|
18 |
+
def pos_parser(text: str, language: str, engine: str = "pyltp") -> Union[List[str], List[str]]:
|
19 |
+
pos_tagger = engine_to_tagger.get(engine)
|
20 |
+
if pos_tagger is None:
|
21 |
+
raise AssertionError(f"engine {engine} not supported.")
|
22 |
+
|
23 |
+
words, postags = pos_tagger(text, language)
|
24 |
+
if len(words) != len(postags):
|
25 |
+
raise AssertionError
|
26 |
+
return words, postags
|
27 |
+
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
pass
|
toolbox/part_of_speech/pyltp_pos_tagger.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
from functools import lru_cache
|
4 |
+
import os
|
5 |
+
from typing import List, Union
|
6 |
+
|
7 |
+
ltp_data_dir = os.environ.get("LTP_DATA_DIR")
|
8 |
+
|
9 |
+
from pyltp import Postagger, Segmentor
|
10 |
+
|
11 |
+
|
12 |
+
@lru_cache(maxsize=5)
|
13 |
+
def get_pyltp_pos_tagger():
|
14 |
+
global ltp_data_dir
|
15 |
+
|
16 |
+
cws_model_path = os.path.join(ltp_data_dir, "cws.model")
|
17 |
+
pos_model_path = os.path.join(ltp_data_dir, "pos.model")
|
18 |
+
|
19 |
+
segmentor = Segmentor(cws_model_path)
|
20 |
+
postagger = Postagger(pos_model_path)
|
21 |
+
|
22 |
+
return segmentor, postagger
|
23 |
+
|
24 |
+
|
25 |
+
def pyltp_pos_tagger(text: str, language: str) -> Union[List[str], List[str]]:
|
26 |
+
segmentor, postagger = get_pyltp_pos_tagger()
|
27 |
+
|
28 |
+
words = segmentor.segment(text)
|
29 |
+
postags = postagger.postag(words)
|
30 |
+
return words, postags
|
31 |
+
|
32 |
+
|
33 |
+
if __name__ == "__main__":
|
34 |
+
pass
|