opennlp.tools.postag.POSSample java code examples

public static POSSample parse(String sentenceString) throws InvalidFormatException {
 String[] tokenTags = WhitespaceTokenizer.INSTANCE.tokenize(sentenceString);
 String[] sentence = new String[tokenTags.length];
 String[] tags = new String[tokenTags.length];
 for (int i = 0; i < tokenTags.length; i++) {
  int split = tokenTags[i].lastIndexOf("_");
  if (split == -1) {
   throw new InvalidFormatException("Cannot find \"_\" inside token '" + tokenTags[i] + "'!");
  }
  sentence[i] = tokenTags[i].substring(0, split);
  tags[i] = tokenTags[i].substring(split + 1);
 }
 return new POSSample(sentence, tags);
}

 @Override
 public boolean equals(Object obj) {
  if (obj == this) {
   return true;
  }

  if (obj instanceof POSSample) {
   POSSample a = (POSSample) obj;

   return Arrays.equals(getSentence(), a.getSentence())
     && Arrays.equals(getTags(), a.getTags());
  }

  return this == obj;
 }
}

/**
 * Tests if it can parse an empty {@link String}.
 */
@Test
public void testParseEmptyString() throws InvalidFormatException {
 String sentence = "";
 POSSample sample = POSSample.parse(sentence);
 Assert.assertEquals(sample.getSentence().length, 0);
 Assert.assertEquals(sample.getTags().length, 0);
}

@Override
protected Iterator<Event> createEvents(POSSample sample) {
 String[] sentence = sample.getSentence();
 String[] tags = sample.getTags();
 Object[] ac = sample.getAddictionalContext();
 List<Event> events = generateEvents(sentence, tags, ac, cg);
 return events.iterator();
}

/**
 * Evaluates the given reference {@link POSSample} object.
 *
 * This is done by tagging the sentence from the reference
 * {@link POSSample} with the {@link POSTagger}. The
 * tags are then used to update the word accuracy score.
 *
 * @param reference the reference {@link POSSample}.
 *
 * @return the predicted {@link POSSample}.
 */
@Override
protected POSSample processSample(POSSample reference) {
 String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext());
 String[] referenceTags = reference.getTags();
 for (int i = 0; i < referenceTags.length; i++) {
  if (referenceTags[i].equals(predictedTags[i])) {
   wordAccuracy.add(1);
  }
  else {
   wordAccuracy.add(0);
  }
 }
 return new POSSample(reference.getSentence(), predictedTags);
}

POSSample sample;
try {
 sample = POSSample.parse(sentence);
} catch (InvalidFormatException e) {
 System.out.println("Error during parsing, ignoring sentence: " + sentence);
 sample = new POSSample(new String[]{}, new String[]{});

@Test
public void evalChunkerModel() throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 POSTagger tagger = new POSTaggerME(new POSModel(
   new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin")));
 Chunker chunker = new ChunkerME(new ChunkerModel(
   new File(getOpennlpDataDir(), "models-sf/en-chunker.bin")));
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText()));
   String[] chunks = chunker.chunk(sentence.getSentence(), sentence.getTags());
   for (String chunk : chunks) {
    digest.update(chunk.getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"),
   new BigInteger(1, digest.digest()));
}

String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
System.out.println(sample.toString());

 @Override
 protected String[] toSentence(POSSample sample) {
  return sample.getSentence();
 }
}

String[] tags = tagger.tag(whitespaceTokenizerLine);
POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
String words[] = sample.getSentence();

@SuppressWarnings("unchecked")
public Event[] updateContext(Sequence sequence, AbstractModel model) {
 Sequence<POSSample> pss = sequence;
 POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory()));
 String[] sentence = pss.getSource().getSentence();
 Object[] ac = pss.getSource().getAddictionalContext();
 String[] tags = tagger.tag(pss.getSource().getSentence());
 Event[] events = new Event[sentence.length];
 POSSampleEventStream.generateEvents(sentence, tags, ac, pcg)
   .toArray(events);
 return events;
}

/**
 * Tests if it can parse an empty token.
 *
 */
@Test
public void testParseEmtpyToken() throws InvalidFormatException {
 String sentence = "the_DT _NNS";
 POSSample sample = POSSample.parse(sentence);
 Assert.assertEquals(sample.getSentence()[1], "");
}

/**
 * Tests if it can parse an empty tag.
 *
 */
@Test
public void testParseEmtpyTag() throws InvalidFormatException {
 String sentence = "the_DT stories_";
 POSSample sample = POSSample.parse(sentence);
 Assert.assertEquals(sample.getTags()[1], "");
}

/**
 * Tests if it can parse a valid token_tag sentence.
 *
 */
@Test
public void testParse() throws InvalidFormatException {
 String sentence = "the_DT stories_NNS about_IN well-heeled_JJ " +
   "communities_NNS and_CC developers_NNS";
 POSSample sample = POSSample.parse(sentence);
 Assert.assertEquals(sentence, sample.toString());
}

public static POSSample createGoldSample() throws InvalidFormatException {
 String sentence = "the_DT stories_NNS about_IN well-heeled_JJ "
   + "communities_NNS and_CC developers_NNS";
 return POSSample.parse(sentence);
}

public String[] tag(String[] sentence) {
 return sample.getTags();
}

public POSSample(List<String> sentence, List<String> tags,
  String[][] additionalContext) {
 this.sentence = Collections.unmodifiableList(sentence);
 this.tags = Collections.unmodifiableList(tags);
 checkArguments();
 String[][] ac;
 if (additionalContext != null) {
  ac = new String[additionalContext.length][];
  for (int i = 0; i < additionalContext.length; i++) {
   ac[i] = new String[additionalContext[i].length];
   System.arraycopy(additionalContext[i], 0, ac[i], 0,
     additionalContext[i].length);
  }
 } else {
  ac = null;
 }
 this.additionalContext = ac;
}

/**
 * Evaluates the given reference {@link POSSample} object.
 *
 * This is done by tagging the sentence from the reference
 * {@link POSSample} with the {@link POSTagger}. The
 * tags are then used to update the word accuracy score.
 *
 * @param reference the reference {@link POSSample}.
 *
 * @return the predicted {@link POSSample}.
 */
@Override
protected POSSample processSample(POSSample reference) {
 String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext());
 String[] referenceTags = reference.getTags();
 for (int i = 0; i < referenceTags.length; i++) {
  if (referenceTags[i].equals(predictedTags[i])) {
   wordAccuracy.add(1);
  }
  else {
   wordAccuracy.add(0);
  }
 }
 return new POSSample(reference.getSentence(), predictedTags);
}

@Test
public void testPOSSampleSerDe() throws IOException {
 POSSample posSample = createGoldSample();
 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
 out.writeObject(posSample);
 out.flush();
 byte[] bytes = byteArrayOutputStream.toByteArray();
 ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes);
 ObjectInput objectInput = new ObjectInputStream(byteArrayInputStream);
 POSSample deSerializedPOSSample = null;
 try {
  deSerializedPOSSample = (POSSample) objectInput.readObject();
 } catch (ClassNotFoundException e) {
  // do nothing
 }
 Assert.assertNotNull(deSerializedPOSSample);
 Assert.assertArrayEquals(posSample.getAddictionalContext(),
   deSerializedPOSSample.getAddictionalContext());
 Assert.assertArrayEquals(posSample.getSentence(), deSerializedPOSSample.getSentence());
 Assert.assertArrayEquals(posSample.getTags(), deSerializedPOSSample.getTags());
}

POSSample sample;
try {
 sample = POSSample.parse(sentence);
} catch (InvalidFormatException e) {
 System.out.println("Error during parsing, ignoring sentence: " + sentence);
 sample = new POSSample(new String[]{}, new String[]{});

Javadoc

Represents an pos-tagged sentence.

Most used methods

Popular in Java

Finding current android device location
getResourceAsStream (ClassLoader)
onRequestPermissionsResult (Fragment)
scheduleAtFixedRate (ScheduledExecutorService)
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Set (java.util)
A Set is a data structure which does not allow duplicate elements.
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
JOptionPane (javax.swing)
Top Vim plugins

How to usePOSSample in opennlp.tools.postag

Best Java code snippets using opennlp.tools.postag.POSSample (Showing top 20 results out of 315)

How to use
POSSample
in
opennlp.tools.postag