mohdelgaar's picture
upload lng
b028d48
raw
history blame
No virus
3.18 kB
import java.io.IOException;
import java.io.StringReader;
import java.util.*;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
class ParserDemo2 {
/** This example shows a few more ways of providing input to a parser.
*
* Usage: ParserDemo2 [grammar [textFile]]
*/
public static void main(String[] args) throws IOException {
String grammar = args.length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
String[] options = { "-maxLength", "80", "-retainTmpSubcategories" };
LexicalizedParser lp = LexicalizedParser.loadModel(grammar, options);
TreebankLanguagePack tlp = lp.getOp().langpack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
Iterable<List<? extends HasWord>> sentences;
if (args.length > 1) {
DocumentPreprocessor dp = new DocumentPreprocessor(args[1]);
List<List<? extends HasWord>> tmp =
new ArrayList<List<? extends HasWord>>();
for (List<HasWord> sentence : dp) {
tmp.add(sentence);
}
sentences = tmp;
} else {
// Showing tokenization and parsing in code a couple of different ways.
String[] sent = { "This", "is", "an", "easy", "sentence", "." };
List<HasWord> sentence = new ArrayList<HasWord>();
for (String word : sent) {
sentence.add(new Word(word));
}
String sent2 = ("This is a slightly longer and more complex " +
"sentence requiring tokenization.");
// Use the default tokenizer for this TreebankLanguagePack
Tokenizer<? extends HasWord> toke =
tlp.getTokenizerFactory().getTokenizer(new StringReader(sent2));
List<? extends HasWord> sentence2 = toke.tokenize();
List<List<? extends HasWord>> tmp =
new ArrayList<List<? extends HasWord>>();
tmp.add(sentence);
tmp.add(sentence2);
sentences = tmp;
}
for (List<? extends HasWord> sentence : sentences) {
Tree parse = lp.parse(sentence);
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
System.out.println("The words of the sentence:");
for (Label lab : parse.yield()) {
if (lab instanceof CoreLabel) {
System.out.println(((CoreLabel) lab).toString("{map}"));
} else {
System.out.println(lab);
}
}
System.out.println();
System.out.println(parse.taggedYield());
System.out.println();
}
// This method turns the String into a single sentence using the
// default tokenizer for the TreebankLanguagePack.
String sent3 = "This is one last test!";
lp.parse(sent3).pennPrint();
}
private ParserDemo2() {} // static methods only
}