File size: 736 Bytes
7884ed6
 
 
 
 
 
8778cfe
 
7884ed6
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import re
import sys
import benepar
from huggingface_hub import hf_hub_download
    
def parse(words):
    model_path = hf_hub_download(repo_id="nielklug/enhg_parser", filename='new-convbert-german-europeana0_dev=83.03.pt')
    parser = benepar.Parser(model_path)
    words = [word.replace('(','-LRB-').replace(')','-RRB-') for word in words]
    input_sentence = benepar.InputSentence(words=words)
    tree = parser.parse(input_sentence)
    tree = str(tree).replace('-LRB-','\\(').replace('-RRB-','\\)').replace('-LSB-','\\[').replace('-RSB-','\\]').replace('($(-','($\\(-')
    # put the whole parse tree on a single line
    tree = re.sub(r'\s+', ' ', tree.strip())
    tree = re.sub(r' \(', '(', tree)
    return tree