Spaces:
Sleeping
Sleeping
File size: 1,144 Bytes
b028d48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
#Language specific options for both training and document parsing
lang_opts=
#Options for parsing input documents (used by lexparser-lang.sh)
parse_opts=
tlp=edu.stanford.nlp.parser.lexparser
if [ $lang == "Arabic" ]; then
tlp="$tlp".ArabicTreebankParserParams
lang_opts="-encoding UTF-8 -arabicFactored"
parse_opts="-tokenized"
elif [ $lang == "ArabicUTM" ]; then
tlp="$tlp".ArabicUTMTreebankParserParams
lang_opts="-encoding UTF-8 -arabicFactored"
parse_opts="-tokenized"
elif [ $lang == "English" ]; then
tlp="$tlp".EnglishTreebankParserParams
elif [ $lang == "German" ]; then
tlp="$tlp".NegraPennTreebankParserParams
lang_opts="-hMarkov 1 -vMarkov 2 -vSelSplitCutOff 300 -uwm 1 -unknownSuffixSize 2 -nodeCleanup 2"
elif [ $lang == "Chinese" ]; then
tlp="$tlp".ChineseTreebankParserParams
lang_opts="-chineseFactored -encoding GB18030"
parse_opts="-tokenized -sentences newline -escaper edu.stanford.nlp.trees.international.pennchinese.ChineseEscaper"
elif [ $lang == "French" ]; then
tlp="$tlp".FrenchTreebankParserParams
lang_opts="-frenchFactored -encoding UTF-8"
parse_opts="-tokenized"
fi |