Spaces:

mohdelgaar
/

LingConv

Sleeping

upload lng

b028d48 about 2 months ago

No virus

1.56 kB

	#!/usr/bin/env bash
	#
	# Defines standard configurations for parsing with the
	# multilingual parsers (Arabic, Chinese, German, French).
	#
	# For English, it is easier to use lexparser.sh, although you can load
	# an English grammar with this script.
	#
	# For details on the language-specific options, see the javadocs and
	# lexparser_lang.def.
	#

	# Memory limit
	mem=3g

	if [ ! $# -ge 5 ]; then
	echo Usage: `basename $0` lang len grammar out_file FILE...
	echo
	echo ' lang : Language to parse (Arabic, English, Chinese, German, French)'
	echo ' len : Maximum length of the sentences to parse'
	echo ' grammar : Serialized grammar file (look in the models jar)'
	echo ' out_file : Prefix for the output filename'
	echo ' FILE : List of files to parse'
	echo
	echo 'To set additional parser options, modify parse_opts in lexparser_lang.def'
	echo
	echo 'Parser memory limit is currently:' "$mem"
	echo
	exit
	fi

	# Setup command-line options
	lang=$1
	len=$2
	grammar=$3
	out_file=$4

	shift 4

	# Language-specific configuration
	scriptdir=`dirname $0`
	source $scriptdir/lexparser_lang.def

	# Setting classpath
	CLASSPATH="$CLASSPATH":"$scriptdir/*"

	# Run the Stanford parser
	java -Xmx"$mem" -cp "$scriptdir/*:" edu.stanford.nlp.parser.lexparser.LexicalizedParser -maxLength "$len" \
	-tLPP "$tlp" $lang_opts $parse_opts -writeOutputFiles \
	-outputFilesExtension "$out_file"."$len".stp -outputFormat "penn" \
	-outputFormatOptions "removeTopBracket,includePunctuationDependencies" -loadFromSerializedFile $grammar $*