File size: 1,144 Bytes
b028d48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#Language specific options for both training and document parsing
lang_opts=

#Options for parsing input documents (used by lexparser-lang.sh)
parse_opts=

tlp=edu.stanford.nlp.parser.lexparser

if [ $lang == "Arabic" ]; then
  tlp="$tlp".ArabicTreebankParserParams
  lang_opts="-encoding UTF-8 -arabicFactored"
  
  parse_opts="-tokenized"

elif [ $lang == "ArabicUTM" ]; then
  tlp="$tlp".ArabicUTMTreebankParserParams
  lang_opts="-encoding UTF-8 -arabicFactored"

  parse_opts="-tokenized"
  
elif [ $lang == "English" ]; then
  tlp="$tlp".EnglishTreebankParserParams

elif [ $lang == "German" ]; then
  tlp="$tlp".NegraPennTreebankParserParams
  lang_opts="-hMarkov 1 -vMarkov 2 -vSelSplitCutOff 300 -uwm 1 -unknownSuffixSize 2 -nodeCleanup 2"

elif [ $lang == "Chinese" ]; then
  tlp="$tlp".ChineseTreebankParserParams
  lang_opts="-chineseFactored -encoding GB18030"

  parse_opts="-tokenized -sentences newline -escaper edu.stanford.nlp.trees.international.pennchinese.ChineseEscaper"

elif [ $lang == "French" ]; then
  tlp="$tlp".FrenchTreebankParserParams
  lang_opts="-frenchFactored -encoding UTF-8"
  
  parse_opts="-tokenized"
fi