Spaces:

mohdelgaar
/

LingConv

Sleeping

App Files Files Community

LingConv / lng /L2SCA /stanford-parser-full-2014-01-04 /ParserDemo.java

mohdelgaar

upload lng

b028d48 3 months ago

raw

history blame

3.97 kB


	import java.util.Collection;
	import java.util.List;
	import java.io.StringReader;

	import edu.stanford.nlp.process.Tokenizer;
	import edu.stanford.nlp.process.TokenizerFactory;
	import edu.stanford.nlp.process.CoreLabelTokenFactory;
	import edu.stanford.nlp.process.DocumentPreprocessor;
	import edu.stanford.nlp.process.PTBTokenizer;
	import edu.stanford.nlp.ling.CoreLabel;
	import edu.stanford.nlp.ling.HasWord;
	import edu.stanford.nlp.ling.Sentence;
	import edu.stanford.nlp.trees.*;
	import edu.stanford.nlp.parser.lexparser.LexicalizedParser;

	class ParserDemo {

	/**
	* The main method demonstrates the easiest way to load a parser.
	* Simply call loadModel and specify the path of a serialized grammar
	* model, which can be a file, a resource on the classpath, or even a URL.
	* For example, this demonstrates loading from the models jar file, which
	* you therefore need to include in the classpath for ParserDemo to work.
	*/
	public static void main(String[] args) {
	LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
	if (args.length > 0) {
	demoDP(lp, args[0]);
	} else {
	demoAPI(lp);
	}
	}

	/**
	* demoDP demonstrates turning a file into tokens and then parse
	* trees. Note that the trees are printed by calling pennPrint on
	* the Tree object. It is also possible to pass a PrintWriter to
	* pennPrint if you want to capture the output.
	*/
	public static void demoDP(LexicalizedParser lp, String filename) {
	// This option shows loading, sentence-segmenting and tokenizing
	// a file using DocumentPreprocessor.
	TreebankLanguagePack tlp = new PennTreebankLanguagePack();
	GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
	// You could also create a tokenizer here (as below) and pass it
	// to DocumentPreprocessor
	for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
	Tree parse = lp.apply(sentence);
	parse.pennPrint();
	System.out.println();

	GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
	Collection tdl = gs.typedDependenciesCCprocessed();
	System.out.println(tdl);
	System.out.println();
	}
	}

	/**
	* demoAPI demonstrates other ways of calling the parser with
	* already tokenized text, or in some cases, raw text that needs to
	* be tokenized as a single sentence. Output is handled with a
	* TreePrint object. Note that the options used when creating the
	* TreePrint can determine what results to print out. Once again,
	* one can capture the output by passing a PrintWriter to
	* TreePrint.printTree.
	*/
	public static void demoAPI(LexicalizedParser lp) {
	// This option shows parsing a list of correctly tokenized words
	String[] sent = { "This", "is", "an", "easy", "sentence", "." };
	List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
	Tree parse = lp.apply(rawWords);
	parse.pennPrint();
	System.out.println();

	// This option shows loading and using an explicit tokenizer
	String sent2 = "This is another sentence.";
	TokenizerFactory<CoreLabel> tokenizerFactory =
	PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
	Tokenizer<CoreLabel> tok =
	tokenizerFactory.getTokenizer(new StringReader(sent2));
	List<CoreLabel> rawWords2 = tok.tokenize();
	parse = lp.apply(rawWords2);

	TreebankLanguagePack tlp = new PennTreebankLanguagePack();
	GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
	GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
	List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
	System.out.println(tdl);
	System.out.println();

	// You can also use a TreePrint object to print trees and dependencies
	TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
	tp.printTree(parse);
	}

	private ParserDemo() {} // static methods only

	}