File size: 1,577 Bytes
b028d48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env bash
#
# Defines standard configurations for training and evaluating the
# multilingual parsers (Arabic, Chinese, German, French). You can
# also train and test the English parsers with this script.
#
# For details on the language-specific options, see the javadocs and
# lexparser_lang.def.
#

# Memory limit
mem=6g

if [ ! $# -ge 5 ]; then
   echo Usage: `basename $0` lang len train_file test_file out_file features
   echo
   echo '  lang       : Language to parse (Arabic, English, Chinese, German, French)'
   echo '  len        : Maximum length of the sentences to parse'
   echo '  train_file : Training treebank file'
   echo '  test_file  : Test treebank file (for evaluation)'
   echo '  out_file   : Prefix for the output filename'
   echo '  features   : Variable length list of optional parser features'
   echo
   echo 'Parser memory limit is currently:' "$mem"
   echo   
   exit
fi

# Setup command-line options
lang=$1
len=$2
train_path=$3
test_file=$4
out_file=$5

shift 5

# Language-specific configuration
scriptdir=`dirname $0`
echo $JAVANLP_HOME
source $JAVANLP_HOME/projects/core/scripts/lexparser_lang.def

# Setting classpath
#CLASSPATH="$CLASSPATH":"$scriptdir/*"

# Run the Stanford parser
java -Xmx"$mem" -cp "$scriptdir/*:$CLASSPATH" edu.stanford.nlp.parser.lexparser.LexicalizedParser -maxLength "$len" \
-tLPP "$tlp" $lang_opts $* -writeOutputFiles \
-outputFilesExtension "$out_file"."$len".stp -outputFormat "penn" \
-outputFormatOptions "removeTopBracket,includePunctuationDependencies" -train "$train_path" -test "$test_file"