opennlp.tools.postag.POSSample.<init> java code examples

public static POSSample parse(String sentenceString) throws InvalidFormatException {
 String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
 String[] sentence = new String[tokenTags.length];
 String[] tags = new String[tokenTags.length];
 for (int i = 0; i < tokenTags.length; i++) {
  int split = tokenTags[i].lastIndexOf("_");
  if (split == -1) {
   throw new InvalidFormatException("Cannot find \"_\" inside token '" + tokenTags[i] + "'!");
  }
  sentence[i] = tokenTags[i].substring(0, split);
  tags[i] = tokenTags[i].substring(split + 1);
 }
 return new POSSample(sentence, tags);
}

public POSSample read() throws IOException {
 Sentence paragraph;
 while ((paragraph = this.adSentenceStream.read()) != null) {
  Node root = paragraph.getRoot();
  List<String> sentence = new ArrayList<>();
  List<String> tags = new ArrayList<>();
  process(root, sentence, tags);
  return new POSSample(sentence, tags);
 }
 return null;
}

 @Override
 public POSSample read() throws IOException {
  ConlluSentence sentence = samples.read();

  if (sentence != null) {
   List<String> tokens = new ArrayList<>();
   List<String> tags = new ArrayList<>();

   for (ConlluWordLine line : sentence.getWordLines()) {
    tokens.add(line.getForm());
    tags.add(line.getPosTag(tagset));
   }

   return new POSSample(tokens, tags);
  }

  return null;
 }
}

 public POSSample read() throws IOException {

  Parse parse = samples.read();

  if (parse != null) {

   List<String> sentence = new ArrayList<>();
   List<String> tags = new ArrayList<>();

   for (Parse tagNode : parse.getTagNodes()) {
    sentence.add(tagNode.getCoveredText());
    tags.add(tagNode.getType());
   }

   return new POSSample(sentence, tags);
  }
  else {
   return null;
  }
 }
}

 sample = read();
sample = new POSSample(tokens.toArray(new String[tokens.size()]),
  tags.toArray(new String[tags.size()]));

System.out.println("Error during parsing, ignoring sentence: " + sentence);
sample = new POSSample(new String[]{}, new String[]{});

 public POSSample read() throws IOException {

  Parse parse = samples.read();

  if (parse != null) {

   Parse[] nodes = parse.getTagNodes();

   String[] toks = new String[nodes.length];
   String[] preds = new String[nodes.length];

   for (int ti = 0; ti < nodes.length; ti++) {
    Parse tok = nodes[ti];
    toks[ti] = tok.getCoveredText();
    preds[ti] = tok.getType();
   }

   return new POSSample(toks, preds);
  }
  else {
   return null;
  }
 }
}

/**
 * Evaluates the given reference {@link POSSample} object.
 *
 * This is done by tagging the sentence from the reference
 * {@link POSSample} with the {@link POSTagger}. The
 * tags are then used to update the word accuracy score.
 *
 * @param reference the reference {@link POSSample}.
 *
 * @return the predicted {@link POSSample}.
 */
@Override
protected POSSample processSample(POSSample reference) {
 String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext());
 String[] referenceTags = reference.getTags();
 for (int i = 0; i < referenceTags.length; i++) {
  if (referenceTags[i].equals(predictedTags[i])) {
   wordAccuracy.add(1);
  }
  else {
   wordAccuracy.add(0);
  }
 }
 return new POSSample(reference.getSentence(), predictedTags);
}

@Test
public void evalChunkerModel() throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 POSTagger tagger = new POSTaggerME(new POSModel(
   new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin")));
 Chunker chunker = new ChunkerME(new ChunkerModel(
   new File(getOpennlpDataDir(), "models-sf/en-chunker.bin")));
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText()));
   String[] chunks = chunker.chunk(sentence.getSentence(), sentence.getTags());
   for (String chunk : chunks) {
    digest.update(chunk.getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"),
   new BigInteger(1, digest.digest()));
}

String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
System.out.println(sample.toString());

public static POSSample parse(String sentenceString) throws InvalidFormatException {
 String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
 String[] sentence = new String[tokenTags.length];
 String[] tags = new String[tokenTags.length];
 for (int i = 0; i < tokenTags.length; i++) {
  int split = tokenTags[i].lastIndexOf("_");
  if (split == -1) {
   throw new InvalidFormatException("Cannot find \"_\" inside token '" + tokenTags[i] + "'!");
  }
  sentence[i] = tokenTags[i].substring(0, split);
  tags[i] = tokenTags[i].substring(split + 1);
 }
 return new POSSample(sentence, tags);
}

public static POSSample parse(String sentenceString) throws InvalidFormatException {
 String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
 String[] sentence = new String[tokenTags.length];
 String[] tags = new String[tokenTags.length];
 for (int i = 0; i < tokenTags.length; i++) {
  int split = tokenTags[i].lastIndexOf("_");
  if (split == -1) {
   throw new InvalidFormatException("Cannot find \"_\" inside token '" + tokenTags[i] + "'!");
  }
  sentence[i] = tokenTags[i].substring(0, split);
  tags[i] = tokenTags[i].substring(split + 1);
 }
 return new POSSample(sentence, tags);
}

public POSSample read() throws IOException {
 Sentence paragraph;
 while ((paragraph = this.adSentenceStream.read()) != null) {
  Node root = paragraph.getRoot();
  List<String> sentence = new ArrayList<>();
  List<String> tags = new ArrayList<>();
  process(root, sentence, tags);
  return new POSSample(sentence, tags);
 }
 return null;
}

public POSSample read() throws IOException {
 Sentence paragraph;
 while ((paragraph = this.adSentenceStream.read()) != null) {
  Node root = paragraph.getRoot();
  List<String> sentence = new ArrayList<>();
  List<String> tags = new ArrayList<>();
  process(root, sentence, tags);
  return new POSSample(sentence, tags);
 }
 return null;
}

 public POSSample read() throws IOException {

  Parse parse = samples.read();

  if (parse != null) {

   List<String> sentence = new ArrayList<>();
   List<String> tags = new ArrayList<>();

   for (Parse tagNode : parse.getTagNodes()) {
    sentence.add(tagNode.getCoveredText());
    tags.add(tagNode.getType());
   }

   return new POSSample(sentence, tags);
  }
  else {
   return null;
  }
 }
}

 @Override
 public POSSample read() throws IOException {
  ConlluSentence sentence = samples.read();

  if (sentence != null) {
   List<String> tokens = new ArrayList<>();
   List<String> tags = new ArrayList<>();

   for (ConlluWordLine line : sentence.getWordLines()) {
    tokens.add(line.getForm());
    tags.add(line.getPosTag(tagset));
   }

   return new POSSample(tokens, tags);
  }

  return null;
 }
}

 @Override
 public POSSample read() throws IOException {
  ConlluSentence sentence = samples.read();

  if (sentence != null) {
   List<String> tokens = new ArrayList<>();
   List<String> tags = new ArrayList<>();

   for (ConlluWordLine line : sentence.getWordLines()) {
    tokens.add(line.getForm());
    tags.add(line.getPosTag(tagset));
   }

   return new POSSample(tokens, tags);
  }

  return null;
 }
}

 public POSSample read() throws IOException {

  Parse parse = samples.read();

  if (parse != null) {

   List<String> sentence = new ArrayList<>();
   List<String> tags = new ArrayList<>();

   for (Parse tagNode : parse.getTagNodes()) {
    sentence.add(tagNode.getCoveredText());
    tags.add(tagNode.getType());
   }

   return new POSSample(sentence, tags);
  }
  else {
   return null;
  }
 }
}

private void process(CAS tcas, AnnotationFS sentence) {
 
 FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
 ContainingConstraint containingConstraint = 
   new ContainingConstraint(sentence);
 
 List<String> tokens = new ArrayList<String>();
 List<String> tags = new ArrayList<String>();
 
 Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
   allTokens.iterator(), containingConstraint);
 
 while (containingTokens.hasNext()) {
  
  AnnotationFS tokenAnnotation = (AnnotationFS) containingTokens.next();
  
  String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature);
  
  tokens.add(tokenAnnotation.getCoveredText().trim());
  tags.add(tag);
 }
 
 mPOSSamples.add(new POSSample(tokens, tags));
}

 public POSSample read() throws IOException {

  Parse parse = samples.read();

  if (parse != null) {

   Parse[] nodes = parse.getTagNodes();

   String[] toks = new String[nodes.length];
   String[] preds = new String[nodes.length];

   for (int ti = 0; ti < nodes.length; ti++) {
    Parse tok = nodes[ti];
    toks[ti] = tok.getCoveredText();
    preds[ti] = tok.getType();
   }

   return new POSSample(toks, preds);
  }
  else {
   return null;
  }
 }
}

Popular methods of POSSample

Popular in Java

Reactive rest calls using spring rest template
runOnUiThread (Activity)
getSystemService (Context)
requestLocationUpdates (LocationManager)
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Path (java.nio.file)
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Top Vim plugins

How to use opennlp.tools.postag.POSSampleconstructor

Best Java code snippets using opennlp.tools.postag.POSSample.<init> (Showing top 20 results out of 315)

How to use
opennlp.tools.postag.POSSample
constructor