opennlp.tools.parser.treeinsert java code examples

@Override
public void init() {
 buildContextGenerator = new BuildContextGenerator();
 attachContextGenerator = new AttachContextGenerator(punctSet);
 checkContextGenerator = new CheckContextGenerator(punctSet);
}

public String[] getContext(Object o) {
 Object[] parts = (Object[]) o;
 return getContext((Parse[]) parts[0], (Integer) parts[1],(List<Parse>) parts[2], (Integer) parts[3]);
}

public String[] getContext(Object o) {
 Object[] parts = (Object[]) o;
 return getContext((Parse[]) parts[0], (Integer) parts[1]);
}

Parse[] children = collapsePunctuation(originalChildren,punctSet);
int numNodes = children.length;
if (numNodes == 0) {
 if (!isBuilt(advanceNode)) {
  break;
int originalZeroIndex = mapParseIndex(0,children,originalChildren);
int originalAdvanceIndex = mapParseIndex(advanceNodeIndex,children,originalChildren);
List<Parse> newParsesList = new ArrayList<>();
buildModel.eval(buildContextGenerator.getContext(children, advanceNodeIndex), bprobs);
double doneProb = bprobs[doneIndex];
if (debugOn)
   newParsesList.add(newParse1);
   if (checkComplete) {
    cprobs = checkModel.eval(checkContextGenerator.getContext(newNode, children,
      advanceNodeIndex,false));
    if (debugOn) System.out.println("building " + tag + " " + bprob + " c=" + cprobs[completeIndex]);
    if (cprobs[completeIndex] > probMass) { //just complete advances
     setComplete(newNode);
     newParse1.addProb(Math.log(cprobs[completeIndex]));
     if (debugOn) System.out.println("Only advancing complete node");
     setIncomplete(newNode);
     newParse1.addProb(Math.log(1 - cprobs[completeIndex]));
     if (debugOn) System.out.println("Only advancing incomplete node");

 if (etype == ParserEventTypeEnum.BUILD) {
  parseEvents.add(new Event(parent.getType(),
    buildContextGenerator.getContext(currentChunks, ci)));
 if (lastChild(chunks[ci], parent)) {
  if (etype == ParserEventTypeEnum.CHECK) {
   parseEvents.add(new Event(Parser.COMPLETE,
     checkContextGenerator.getContext(currentChunks[ci],currentChunks, ci,false)));
  if (etype == ParserEventTypeEnum.CHECK) {
   parseEvents.add(new Event(Parser.INCOMPLETE,
     checkContextGenerator.getContext(currentChunks[ci],currentChunks,ci,false)));
parseEvents.add(new Event(Parser.DONE, buildContextGenerator.getContext(currentChunks, ci)));
List<Parse> currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet);
if (currentRightFrontier.size() != rightFrontier.size()) {
 System.err.println("fontiers mis-aligned: " + currentRightFrontier.size() + " != "
Map<Parse, Integer> parents = getNonAdjoinedParent(chunks[ci]);
  if (debug)
   System.err.println("Looking at attachment site (" + cfi + "): "
     + cfn.getType() + " ci=" + i + " cs=" + nonPunctChildCount(cfn)
     + ", " + cfn + " :for " + currentChunks[ci].getType() + " "
     + currentChunks[ci] + " -> " + parents);
  if (attachNode == null &&  i != null && i == nonPunctChildCount(cfn)) {
   attachType = Parser.ATTACH_DAUGHTER;

Parse[] children = Parser.collapsePunctuation(parent.getChildren(),punctSet);
Parse pstart = children[0];
Parse pend = children[children.length - 1];
String type = parent.getType();
checkcons(pstart, "begin", type, features);
checkcons(pend, "last", type, features);
String production = "p=" + production(parent,false);
String punctProduction = "pp=" + production(parent,true);
features.add(production);
features.add(punctProduction);
 rf = Parser.getRightFrontier(constituents[0], punctSet);
 if (trimFrontier) {
  int pi = rf.indexOf(parent);
getFrontierNodes(rf,leftNodes);
Parse p_1 = leftNodes[0];
Parse p_2 = leftNodes[1];
 p2 = constituents[index + 2];
surround(p_1, -1, type, p_1s, features);
surround(p_2, -2, type, p_2s, features);
surround(p1, 1, type, p1s, features);
surround(p2, 2, type, p2s, features);

 rf = Parser.getRightFrontier(constituents[0], emptyPunctSet);
getFrontierNodes(rf,leftNodes);
Parse p_1 = leftNodes[0];
Parse p_2 = leftNodes[1];
String consp_2 = cons(p_2, -2);
String consp_1 = cons(p_1, -1);
String consp0 = cons(p0, 0);
String consp1 = cons(p1, 1);
String consp2 = cons(p2, 2);
String consbop_2 = consbo(p_2, -2);
String consbop_1 = consbo(p_1, -1);
String consbop0 = consbo(p0, 0);
String consbop1 = consbo(p1, 1);
String consbop2 = consbo(p2, 2);
cons2(features,c0,c1,punct1s,true);
cons2(features,c_1,c0,punct_1s,true);
cons3(features,c0,c1,c2,punct1s,punct2s,true,true,true);
cons3(features,c_2,c_1,c0,punct_2s,punct_1s,true,true,true);
cons3(features,c_1,c0,c1,punct_1s,punct_1s,true,true,true);

public static ParserModel train(String languageCode,
  ObjectStream<Parse> parseSamples, HeadRules rules, int iterations, int cut)
  throws IOException {
 TrainingParameters params = new TrainingParameters();
 params.put("dict", TrainingParameters.CUTOFF_PARAM, cut);
 params.put("tagger", TrainingParameters.CUTOFF_PARAM, cut);
 params.put("tagger", TrainingParameters.ITERATIONS_PARAM, iterations);
 params.put("chunker", TrainingParameters.CUTOFF_PARAM, cut);
 params.put("chunker", TrainingParameters.ITERATIONS_PARAM, iterations);
 params.put("check", TrainingParameters.CUTOFF_PARAM, cut);
 params.put("check", TrainingParameters.ITERATIONS_PARAM, iterations);
 params.put("build", TrainingParameters.CUTOFF_PARAM, cut);
 params.put("build", TrainingParameters.ITERATIONS_PARAM, iterations);
 return train(languageCode, parseSamples, rules, params);
}

private int indexOf(Parse child, Parse parent) {
 Parse[] kids = Parser.collapsePunctuation(parent.getChildren(),punctSet);
 for (int ki = 0; ki < kids.length; ki++) {
  if (child == kids[ki]) {
   return ki;
  }
 }
 return -1;
}

private void setIncomplete(Parse p) {
 if (!isBuilt(p)) {
  p.setLabel(Parser.INCOMPLETE);
 }
 else {
  p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE);
 }
}

@Override
protected Parse[] advanceChunks(Parse p, double minChunkScore) {
 Parse[] parses = super.advanceChunks(p, minChunkScore);
 for (Parse parse : parses) {
  Parse[] chunks = parse.getChildren();
  for (int ci = 0; ci < chunks.length; ci++) {
   setComplete(chunks[ci]);
  }
 }
 return parses;
}

/**
 * Returns a set of parent nodes which consist of the immediate
 * parent of the specified node and any of its parent which
 * share the same syntactic type.
 * @param node The node whose parents are to be returned.
 * @return a set of parent nodes.
 */
private Map<Parse, Integer> getNonAdjoinedParent(Parse node) {
 Map<Parse, Integer> parents = new HashMap<>();
 Parse parent = node.getParent();
 int index = indexOf(node,parent);
 parents.put(parent, index);
 while (parent.getType().equals(node.getType())) {
  node = parent;
  parent = parent.getParent();
  index = indexOf(node,parent);
  parents.put(parent, index);
 }
 return parents;
}

private void setBuilt(Parse p) {
 String l = p.getLabel();
 if (l == null) {
  p.setLabel(Parser.BUILT);
 }
 else {
  if (isComplete(p)) {
   p.setLabel(Parser.BUILT + "." + Parser.COMPLETE);
  }
  else {
   p.setLabel(Parser.BUILT + "." + Parser.INCOMPLETE);
  }
 }
}

public static Parser create(ParserModel model, int beamSize, double advancePercentage) {
 if (ParserType.CHUNKING.equals(model.getParserType())) {
  return new opennlp.tools.parser.chunking.Parser(model, beamSize, advancePercentage);
 }
 else if (ParserType.TREEINSERT.equals(model.getParserType())) {
  return new opennlp.tools.parser.treeinsert.Parser(model, beamSize, advancePercentage);
 }
 else {
  throw new IllegalStateException("Unexpected ParserType: " +
    model.getParserType().name());
 }
}

private Parser(MaxentModel buildModel, MaxentModel attachModel, MaxentModel checkModel,
        POSTagger tagger, Chunker chunker, HeadRules headRules, int beamSize,
        double advancePercentage) {
 super(tagger,chunker,headRules,beamSize,advancePercentage);
 this.buildModel = buildModel;
 this.attachModel = attachModel;
 this.checkModel = checkModel;
 this.buildContextGenerator = new BuildContextGenerator();
 this.attachContextGenerator = new AttachContextGenerator(punctSet);
 this.checkContextGenerator = new CheckContextGenerator(punctSet);
 this.bprobs = new double[buildModel.getNumOutcomes()];
 this.aprobs = new double[attachModel.getNumOutcomes()];
 this.cprobs = new double[checkModel.getNumOutcomes()];
 this.doneIndex = buildModel.getIndex(DONE);
 this.sisterAttachIndex = attachModel.getIndex(ATTACH_SISTER);
 this.daughterAttachIndex = attachModel.getIndex(ATTACH_DAUGHTER);
 this.nonAttachIndex = attachModel.getIndex(NON_ATTACH);
 attachments = new int[] {daughterAttachIndex,sisterAttachIndex};
 this.completeIndex = checkModel.getIndex(Parser.COMPLETE);
}

 /**
  * Verify that training and tagging does not cause
  * runtime problems.
  */
 @Test
 public void testTreeInsertParserTraining() throws Exception {

  ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData();
  HeadRules headRules = ParserTestUtil.createTestHeadRules();

  ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0);

  opennlp.tools.parser.Parser parser = ParserFactory.create(model);

  // Tests parsing to make sure the code does not has
  // a bug which fails always with a runtime exception
  parser.parse(Parse.parseParse("She was just another freighter from the " +
    "States and she seemed as commonplace as her name ."));

  // Test serializing and de-serializing model
  ByteArrayOutputStream outArray = new ByteArrayOutputStream();
  model.serialize(outArray);
  outArray.close();

  new ParserModel(new ByteArrayInputStream(outArray.toByteArray()));

  // TODO: compare both models
 }
}

private int nonPunctChildCount(Parse node) {
 return Parser.collapsePunctuation(node.getChildren(),punctSet).length;
}
/*

private void setComplete(Parse p) {
 String l = p.getLabel();
 if (!isBuilt(p)) {
  p.setLabel(Parser.COMPLETE);
 }
 else {
  p.setLabel(Parser.BUILT + "." + Parser.COMPLETE);
 }
}

public void evaluate(ObjectStream<Parse> samples, int nFolds) throws IOException {
 CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds);
 while (partitioner.hasNext()) {
  CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next();
  ParserModel model;
  if (ParserType.CHUNKING.equals(parserType)) {
   model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params);
  }
  else if (ParserType.TREEINSERT.equals(parserType)) {
   model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params);
  }
  else {
   throw new IllegalStateException("Unexpected parser type: " + parserType);
  }
  ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors);
  evaluator.evaluate(trainingSampleStream.getTestSampleStream());
  fmeasure.mergeInto(evaluator.getFMeasure());
 }
}

/**
 * Returns the right frontier of the specified parse tree with nodes ordered from deepest
 * to shallowest.
 * @param root The root of the parse tree.
 * @return The right frontier of the specified parse tree.
 */
public static List<Parse> getRightFrontier(Parse root,Set<String> punctSet) {
 List<Parse> rf = new LinkedList<>();
 Parse top;
 if (AbstractBottomUpParser.TOP_NODE.equals(root.getType()) ||
   AbstractBottomUpParser.INC_NODE.equals(root.getType())) {
  top = collapsePunctuation(root.getChildren(),punctSet)[0];
 }
 else {
  top = root;
 }
 while (!top.isPosTag()) {
  rf.add(0,top);
  Parse[] kids = top.getChildren();
  top = kids[kids.length - 1];
 }
 return new ArrayList<>(rf);
}

How to use opennlp.tools.parser.treeinsert

Best Java code snippets using opennlp.tools.parser.treeinsert (Showing top 20 results out of 315)