Spaces:
Running
Running
File size: 736 Bytes
7884ed6 8778cfe 7884ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import re
import sys
import benepar
from huggingface_hub import hf_hub_download
def parse(words):
model_path = hf_hub_download(repo_id="nielklug/enhg_parser", filename='new-convbert-german-europeana0_dev=83.03.pt')
parser = benepar.Parser(model_path)
words = [word.replace('(','-LRB-').replace(')','-RRB-') for word in words]
input_sentence = benepar.InputSentence(words=words)
tree = parser.parse(input_sentence)
tree = str(tree).replace('-LRB-','\\(').replace('-RRB-','\\)').replace('-LSB-','\\[').replace('-RSB-','\\]').replace('($(-','($\\(-')
# put the whole parse tree on a single line
tree = re.sub(r'\s+', ' ', tree.strip())
tree = re.sub(r' \(', '(', tree)
return tree
|