Spaces:
Sleeping
Sleeping
File size: 1,564 Bytes
b028d48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
#!/usr/bin/env bash
#
# Defines standard configurations for parsing with the
# multilingual parsers (Arabic, Chinese, German, French).
#
# For English, it is easier to use lexparser.sh, although you can load
# an English grammar with this script.
#
# For details on the language-specific options, see the javadocs and
# lexparser_lang.def.
#
# Memory limit
mem=3g
if [ ! $# -ge 5 ]; then
echo Usage: `basename $0` lang len grammar out_file FILE...
echo
echo ' lang : Language to parse (Arabic, English, Chinese, German, French)'
echo ' len : Maximum length of the sentences to parse'
echo ' grammar : Serialized grammar file (look in the models jar)'
echo ' out_file : Prefix for the output filename'
echo ' FILE : List of files to parse'
echo
echo 'To set additional parser options, modify parse_opts in lexparser_lang.def'
echo
echo 'Parser memory limit is currently:' "$mem"
echo
exit
fi
# Setup command-line options
lang=$1
len=$2
grammar=$3
out_file=$4
shift 4
# Language-specific configuration
scriptdir=`dirname $0`
source $scriptdir/lexparser_lang.def
# Setting classpath
CLASSPATH="$CLASSPATH":"$scriptdir/*"
# Run the Stanford parser
java -Xmx"$mem" -cp "$scriptdir/*:" edu.stanford.nlp.parser.lexparser.LexicalizedParser -maxLength "$len" \
-tLPP "$tlp" $lang_opts $parse_opts -writeOutputFiles \
-outputFilesExtension "$out_file"."$len".stp -outputFormat "penn" \
-outputFormatOptions "removeTopBracket,includePunctuationDependencies" -loadFromSerializedFile $grammar $*
|