Spaces:
Running
Running
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
import argparse | |
import asyncio | |
import logging | |
import json | |
import os | |
import platform | |
import time | |
from project_settings import project_path | |
os.environ["NLTK_DATA"] = (project_path / "data/nltk_data").as_posix() | |
os.environ["LTP_DATA_DIR"] = (project_path / "data/pyltp_models/ltp_data_v3.4.0").as_posix() | |
from project_settings import project_path, log_directory | |
import log | |
log.setup(log_directory=log_directory) | |
import gradio as gr | |
from toolbox.os.command import Command | |
from toolbox.named_entity_recognization.named_entity_recognization import ( | |
language_to_engines as ner_language_to_engines, | |
engine_to_tagger as ner_engine_to_tagger, | |
ner | |
) | |
from toolbox.part_of_speech.part_of_speech import ( | |
language_to_engines as pos_language_to_engines, | |
engine_to_tagger as pos_engine_to_tagger, | |
pos_tag | |
) | |
from toolbox.sementic_role_labeling.sementic_role_labeling import ( | |
language_to_engines as srl_language_to_engines, | |
engine_to_tagger as srl_engine_to_tagger, | |
srl | |
) | |
from toolbox.tokenization.tokenization import ( | |
language_to_engines as t_language_to_engines, | |
engine_to_tagger as t_engine_to_tagger, | |
tokenize | |
) | |
main_logger = logging.getLogger("main") | |
def get_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--ner_example_json_file", | |
default=(project_path / "ner_examples.json").as_posix(), | |
type=str | |
) | |
parser.add_argument( | |
"--pos_example_json_file", | |
default=(project_path / "pos_examples.json").as_posix(), | |
type=str | |
) | |
parser.add_argument( | |
"--srl_example_json_file", | |
default=(project_path / "srl_examples.json").as_posix(), | |
type=str | |
) | |
parser.add_argument( | |
"--token_example_json_file", | |
default=(project_path / "token_examples.json").as_posix(), | |
type=str | |
) | |
args = parser.parse_args() | |
return args | |
def run_ner(text: str, language: str, engine: str) -> str: | |
try: | |
main_logger.info(f"ner started. text: {text}, language: {language}, engine: {engine}") | |
begin = time.time() | |
words, postags, ner_tags = ner(text, language, engine) | |
result = "" | |
for word, ner_tag in zip(words, ner_tags): | |
row = f"{word}/{ner_tag}" | |
result += f"{row}\t" | |
time_cost = time.time() - begin | |
result += f"\n\ntime_cost: {round(time_cost, 4)}" | |
return result | |
except Exception as e: | |
result = f"{type(e)}\n{str(e)}" | |
return result | |
def run_pos_tag(text: str, language: str, engine: str) -> str: | |
try: | |
main_logger.info(f"pos tag started. text: {text}, language: {language}, engine: {engine}") | |
begin = time.time() | |
words, postags = pos_tag(text, language, engine) | |
result = "" | |
for word, postag in zip(words, postags): | |
row = f"{word}/{postag}" | |
result += f"{row}\t" | |
time_cost = time.time() - begin | |
result += f"\n\ntime_cost: {round(time_cost, 4)}" | |
return result | |
except Exception as e: | |
result = f"{type(e)}\n{str(e)}" | |
return result | |
def run_srl(text: str, language: str, engine: str) -> str: | |
try: | |
main_logger.info(f"srl started. text: {text}, language: {language}, engine: {engine}") | |
begin = time.time() | |
words, postags, arcs, roles = srl(text, language, engine) | |
result = "" | |
for role in roles: | |
row = "" | |
for r in role: | |
name = r[0] | |
start = r[1][0] | |
end = r[1][1] | |
arg_text = "".join(words[start:end+1]) | |
row += f"{arg_text}/{name}\t" | |
result += f"{row}\n" | |
time_cost = time.time() - begin | |
result += f"\n\ntime_cost: {round(time_cost, 4)}" | |
return result | |
except Exception as e: | |
result = f"{type(e)}\n{str(e)}" | |
return result | |
def run_tokenization(text: str, language: str, engine: str) -> str: | |
try: | |
main_logger.info(f"tokenization started. text: {text}, language: {language}, engine: {engine}") | |
begin = time.time() | |
words = tokenize(text, language, engine) | |
result = "" | |
for word in words: | |
result += f"{word}\t" | |
time_cost = time.time() - begin | |
result += f"\n\ntime_cost: {round(time_cost, 4)}" | |
return result | |
except Exception as e: | |
result = f"{type(e)}\n{str(e)}" | |
return result | |
def shell(cmd: str): | |
return Command.popen(cmd) | |
def main(): | |
args = get_args() | |
with open(args.ner_example_json_file, "r", encoding="utf-8") as f: | |
ner_examples: list = json.load(f) | |
with open(args.pos_example_json_file, "r", encoding="utf-8") as f: | |
pos_examples: list = json.load(f) | |
with open(args.srl_example_json_file, "r", encoding="utf-8") as f: | |
srl_examples: list = json.load(f) | |
with open(args.token_example_json_file, "r", encoding="utf-8") as f: | |
token_examples: list = json.load(f) | |
# blocks | |
with gr.Blocks() as blocks: | |
gr.Markdown(value="## 词性标注.") | |
with gr.Tabs(): | |
with gr.TabItem("part of speech"): | |
def pos_get_languages_by_engine(engine: str): | |
language_list = list() | |
for k, v in pos_language_to_engines.items(): | |
if engine in v: | |
language_list.append(k) | |
return gr.Dropdown(choices=language_list, value=language_list[0], label="language") | |
pos_language_choices = list(pos_language_to_engines.keys()) | |
pos_engine_choices = list(pos_engine_to_tagger.keys()) | |
pos_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text") | |
with gr.Row(): | |
pos_language = gr.Dropdown( | |
choices=pos_language_choices, value=pos_language_choices[0], | |
label="language" | |
) | |
pos_engine = gr.Dropdown( | |
choices=pos_engine_choices, value=pos_engine_choices[0], | |
label="engine" | |
) | |
pos_engine.change( | |
pos_get_languages_by_engine, | |
inputs=[pos_engine], | |
outputs=[pos_language], | |
) | |
pos_output = gr.Textbox(lines=4, max_lines=50, label="output") | |
pos_button = gr.Button(value="pos_tag", variant="primary") | |
pos_button.click( | |
run_pos_tag, | |
inputs=[pos_text, pos_language, pos_engine], | |
outputs=[pos_output], | |
) | |
gr.Examples( | |
examples=pos_examples, | |
inputs=[pos_text, pos_language, pos_engine], | |
outputs=[pos_output], | |
fn=run_pos_tag, | |
) | |
with gr.TabItem("srl"): | |
def srl_get_languages_by_engine(engine: str): | |
language_list = list() | |
for k, v in pos_language_to_engines.items(): | |
if engine in v: | |
language_list.append(k) | |
return gr.Dropdown(choices=language_list, value=language_list[0], label="language") | |
srl_language_choices = list(srl_language_to_engines.keys()) | |
srl_engine_choices = list(srl_engine_to_tagger.keys()) | |
srl_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text") | |
with gr.Row(): | |
srl_language = gr.Dropdown( | |
choices=srl_language_choices, value=srl_language_choices[0], | |
label="language" | |
) | |
srl_engine = gr.Dropdown( | |
choices=srl_engine_choices, value=srl_engine_choices[0], | |
label="engine" | |
) | |
srl_engine.change( | |
srl_get_languages_by_engine, | |
inputs=[srl_engine], | |
outputs=[srl_language], | |
) | |
srl_output = gr.Textbox(lines=4, max_lines=50, label="output") | |
srl_button = gr.Button(value="pos_tag", variant="primary") | |
srl_button.click( | |
run_srl, | |
inputs=[srl_text, srl_language, srl_engine], | |
outputs=[srl_output], | |
) | |
gr.Examples( | |
examples=srl_examples, | |
inputs=[srl_text, srl_language, srl_engine], | |
outputs=[srl_output], | |
fn=run_srl, | |
) | |
with gr.TabItem("ner"): | |
def ner_get_languages_by_engine(engine: str): | |
language_list = list() | |
for k, v in ner_language_to_engines.items(): | |
if engine in v: | |
language_list.append(k) | |
return gr.Dropdown(choices=language_list, value=language_list[0], label="language") | |
ner_language_choices = list(ner_language_to_engines.keys()) | |
ner_engine_choices = list(ner_engine_to_tagger.keys()) | |
ner_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text") | |
with gr.Row(): | |
ner_language = gr.Dropdown( | |
choices=ner_language_choices, value=ner_language_choices[0], | |
label="language" | |
) | |
ner_engine = gr.Dropdown( | |
choices=ner_engine_choices, value=ner_engine_choices[0], | |
label="engine" | |
) | |
ner_engine.change( | |
ner_get_languages_by_engine, | |
inputs=[ner_engine], | |
outputs=[ner_language], | |
) | |
ner_output = gr.Textbox(lines=4, max_lines=50, label="output") | |
ner_button = gr.Button(value="pos_tag", variant="primary") | |
ner_button.click( | |
run_ner, | |
inputs=[ner_text, ner_language, ner_engine], | |
outputs=[ner_output], | |
) | |
gr.Examples( | |
examples=ner_examples, | |
inputs=[ner_text, ner_language, ner_engine], | |
outputs=[ner_output], | |
fn=run_ner, | |
) | |
with gr.TabItem("tokenization"): | |
def t_get_languages_by_engine(engine: str): | |
language_list = list() | |
for k, v in t_language_to_engines.items(): | |
if engine in v: | |
language_list.append(k) | |
return gr.Dropdown(choices=language_list, value=language_list[0], label="language") | |
t_language_choices = list(t_language_to_engines.keys()) | |
t_engine_choices = list(t_engine_to_tagger.keys()) | |
t_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text") | |
with gr.Row(): | |
t_language = gr.Dropdown( | |
choices=t_language_choices, value=t_language_choices[0], | |
label="language" | |
) | |
t_engine = gr.Dropdown( | |
choices=t_engine_choices, value=t_engine_choices[0], | |
label="engine" | |
) | |
t_engine.change( | |
t_get_languages_by_engine, | |
inputs=[t_engine], | |
outputs=[t_language], | |
) | |
t_output = gr.Textbox(lines=4, max_lines=50, label="output") | |
t_button = gr.Button(value="pos_tag", variant="primary") | |
t_button.click( | |
run_tokenization, | |
inputs=[t_text, t_language, t_engine], | |
outputs=[t_output], | |
) | |
gr.Examples( | |
examples=token_examples, | |
inputs=[t_text, t_language, t_engine], | |
outputs=[t_output], | |
fn=run_tokenization, | |
) | |
with gr.TabItem("shell"): | |
shell_text = gr.Textbox(label="cmd") | |
shell_button = gr.Button("run") | |
shell_output = gr.Textbox(label="output") | |
shell_button.click(shell, inputs=[shell_text,], outputs=[shell_output]) | |
blocks.queue().launch( | |
share=False if platform.system() == "Windows" else False, | |
server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0", | |
server_port=7860, | |
) | |
return | |
if __name__ == "__main__": | |
main() | |