@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { // parse a tree for each sentence for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); if (VERBOSE) { log.info("Parsing: " + words); } int maxSentenceLength = parser.getMaxSentenceLength(); // generate the constituent tree Tree tree; // initialized below if (maxSentenceLength <= 0 || words.size() < maxSentenceLength) { tree = parser.getBestParse(words); } else { tree = ParserUtils.xTree(words); } List<Tree> trees = Generics.newArrayList(1); trees.add(tree); ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, GrammaticalStructure.Extras.NONE); } } else { throw new RuntimeException("unable to find sentences in: " + annotation); } }
setMissingTags(sentence, tree);
public static void fillInParseAnnotations(boolean verbose, CoreMap sentence, Tree tree) { // make sure all tree nodes are CoreLabels // TODO: why isn't this always true? something fishy is going on ParserAnnotatorUtils.convertToCoreLabels(tree); // index nodes, i.e., add start and end token positions to all nodes // this is needed by other annotators down stream, e.g., the NFLAnnotator tree.indexSpans(0); sentence.set(TreeAnnotation.class, tree); if (verbose) { System.err.println("Tree is:"); tree.pennPrint(System.err); } // generate the dependency graph SemanticGraph deps = generateCollapsedDependencies(tree); SemanticGraph uncollapsedDeps = generateUncollapsedDependencies(tree); SemanticGraph ccDeps = generateCCProcessedDependencies(tree); if (verbose) { System.err.println("SDs:"); System.err.println(deps.toString("plain")); } sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps); sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps); setMissingTags(sentence, tree); }
public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { // parse a tree for each sentence for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = null; List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); if (VERBOSE) { System.err.println("Parsing: " + words); } // generate the constituent tree if(maxSentenceLength <= 0 || words.size() < maxSentenceLength) tree = parser.apply(words); else tree = ParserAnnotatorUtils.xTree(words); if (treeMap != null) { tree = treeMap.apply(tree); } ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, sentence, tree); } } else { throw new RuntimeException("unable to find sentences in: " + annotation); } }
public static SemanticGraph generateCCProcessedDependencies(Tree tree) { return generateDependencies(tree, true, true, false, true, true); }
/** * Converts the tree labels to CoreLabels. * We need this because we store additional info in the CoreLabel, like token span. * @param tree */ public static void convertToCoreLabels(Tree tree) { Label l = tree.label(); if (!(l instanceof CoreLabel)) { CoreLabel cl = new CoreLabel(); cl.setValue(l.value()); tree.setLabel(cl); } for (Tree kid : tree.children()) { convertToCoreLabels(kid); } }
public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { // parse a tree for each sentence for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) { Tree tree = null; List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); if (VERBOSE) { System.err.println("Parsing: " + words); } int maxSentenceLength = parser.getMaxSentenceLength(); // generate the constituent tree if (maxSentenceLength <= 0 || words.size() < maxSentenceLength) { tree = parser.getBestParse(words); } else { tree = ParserAnnotatorUtils.xTree(words); } ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, sentence, tree); } } else { throw new RuntimeException("unable to find sentences in: " + annotation); } } }
public static SemanticGraph generateCollapsedDependencies(Tree tree) { return generateDependencies(tree, true, false, false, true, true); }
private void finishSentence(CoreMap sentence, List<Tree> trees) { if (treeMap != null) { List<Tree> mappedTrees = Generics.newLinkedList(); for (Tree tree : trees) { Tree mappedTree = treeMap.apply(tree); mappedTrees.add(mappedTree); } trees = mappedTrees; } ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, extraDependencies); if (saveBinaryTrees) { TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); Tree binarized = binarizer.transformTree(trees.get(0)); Trees.convertToCoreLabels(binarized); sentence.set(TreeCoreAnnotations.BinarizedTreeAnnotation.class, binarized); } // for some reason in some corner cases nodes aren't having sentenceIndex set // do a pass and make sure all nodes have sentenceIndex set SemanticGraph sg = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); if (sg != null) { for (IndexedWord iw : sg.vertexSet()) { if (iw.get(CoreAnnotations.SentenceIndexAnnotation.class) == null && sentence.get(CoreAnnotations.SentenceIndexAnnotation.class) != null) { iw.setSentIndex(sentence.get(CoreAnnotations.SentenceIndexAnnotation.class)); } } } }
setMissingTags(sentence, tree);
public static SemanticGraph generateUncollapsedDependencies(Tree tree) { return generateDependencies(tree, false, false, false, true, true); }
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { // parse a tree for each sentence for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); if (VERBOSE) { System.err.println("Parsing: " + words); } int maxSentenceLength = parser.getMaxSentenceLength(); // generate the constituent tree Tree tree; // initialized below if (maxSentenceLength <= 0 || words.size() < maxSentenceLength) { tree = parser.getBestParse(words); } else { tree = ParserUtils.xTree(words); } ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, tree); } } else { throw new RuntimeException("unable to find sentences in: " + annotation); } }
setMissingTags(sentence, tree);
@Override public void annotate(Annotation annotation) { if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) { // parse a tree for each sentence for (CoreMap sentence: annotation.get(CoreAnnotations.SentencesAnnotation.class)) { List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); if (VERBOSE) { log.info("Parsing: " + words); } int maxSentenceLength = parser.getMaxSentenceLength(); // generate the constituent tree Tree tree; // initialized below if (maxSentenceLength <= 0 || words.size() < maxSentenceLength) { tree = parser.getBestParse(words); } else { tree = ParserUtils.xTree(words); } List<Tree> trees = Generics.newArrayList(1); trees.add(tree); ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, GrammaticalStructure.Extras.NONE); } } else { throw new RuntimeException("unable to find sentences in: " + annotation); } }
setMissingTags(sentence, tree);
ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, tree);
private void finishSentence(CoreMap sentence, List<Tree> trees) { if (treeMap != null) { List<Tree> mappedTrees = Generics.newLinkedList(); for (Tree tree : trees) { Tree mappedTree = treeMap.apply(tree); mappedTrees.add(mappedTree); } trees = mappedTrees; } ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, extraDependencies); if (saveBinaryTrees) { TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); Tree binarized = binarizer.transformTree(trees.get(0)); Trees.convertToCoreLabels(binarized); sentence.set(TreeCoreAnnotations.BinarizedTreeAnnotation.class, binarized); } // for some reason in some corner cases nodes aren't having sentenceIndex set // do a pass and make sure all nodes have sentenceIndex set SemanticGraph sg = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); if (sg != null) { for (IndexedWord iw : sg.vertexSet()) { if (iw.get(CoreAnnotations.SentenceIndexAnnotation.class) == null && sentence.get(CoreAnnotations.SentenceIndexAnnotation.class) != null) { iw.setSentIndex(sentence.get(CoreAnnotations.SentenceIndexAnnotation.class)); } } } }
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory( tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()); ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, asList(tree), GrammaticalStructure.Extras.NONE);