@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getTokens()), Arrays.hashCode(getTags()), Arrays.hashCode(getLemmas())); }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof LemmaSample) { LemmaSample a = (LemmaSample) obj; return Arrays.equals(getTokens(), a.getTokens()) && Arrays.equals(getTags(), a.getTags()) && Arrays.equals(getLemmas(), a.getLemmas()); } return false; } }
protected Iterator<Event> createEvents(LemmaSample sample) { if (sample != null) { List<Event> events = new ArrayList<>(); String[] toksArray = sample.getTokens(); String[] tagsArray = sample.getTags(); String[] lemmasArray = LemmatizerME.encodeLemmas(toksArray,sample.getLemmas()); for (int ei = 0, el = sample.getTokens().length; ei < el; ei++) { events.add(new Event(lemmasArray[ei], contextGenerator.getContext(ei,toksArray,tagsArray,lemmasArray))); } return events.iterator(); } else { return Collections.emptyListIterator(); } } }
/** * Evaluates the given reference {@link LemmaSample} object. * * This is done by tagging the sentence from the reference * {@link LemmaSample} with the {@link Lemmatizer}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link LemmaSample}. * * @return the predicted {@link LemmaSample}. */ @Override protected LemmaSample processSample(LemmaSample reference) { String[] predictedLemmas = lemmatizer.lemmatize(reference.getTokens(), reference.getTags()); String[] referenceLemmas = reference.getLemmas(); for (int i = 0; i < referenceLemmas.length; i++) { if (referenceLemmas[i].equals(predictedLemmas[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new LemmaSample(reference.getTokens(), reference.getTags(), predictedLemmas); }
public String[] lemmatize(String[] toks, String[] tags) { try { LemmaSample predsSample = mSampleStream.read(); // checks if the streams are sync for (int i = 0; i < toks.length; i++) { if (!toks[i].equals(predsSample.getTokens()[i]) || !tags[i].equals(predsSample.getTags()[i])) { throw new RuntimeException("The streams are not sync!" + "\n expected sentence: " + Arrays.toString(toks) + "\n expected tags: " + Arrays.toString(tags) + "\n predicted sentence: " + Arrays.toString(predsSample.getTokens()) + "\n predicted tags: " + Arrays.toString(predsSample.getTags())); } } return predsSample.getLemmas(); } catch (IOException e) { throw new RuntimeException(e); } }
@Override public Sequence read() throws IOException { LemmaSample sample = samples.read(); if (sample != null) { String[] sentence = sample.getTokens(); String[] tags = sample.getTags(); String[] preds = sample.getLemmas(); Event[] events = new Event[sentence.length]; for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context = contextGenerator.getContext(i, sentence, tags, preds); events[i] = new Event(tags[i], context); } return new Sequence<>(events,sample); } return null; }
@Test public void testLemmaSampleSerDe() throws IOException { LemmaSample lemmaSample = createGoldSample(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream); out.writeObject(lemmaSample); out.flush(); byte[] bytes = byteArrayOutputStream.toByteArray(); ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes); ObjectInput objectInput = new ObjectInputStream(byteArrayInputStream); LemmaSample deSerializedLemmaSample = null; try { deSerializedLemmaSample = (LemmaSample) objectInput.readObject(); } catch (ClassNotFoundException e) { // do nothing } Assert.assertNotNull(deSerializedLemmaSample); Assert.assertArrayEquals(lemmaSample.getLemmas(), deSerializedLemmaSample.getLemmas()); Assert.assertArrayEquals(lemmaSample.getTokens(), deSerializedLemmaSample.getTokens()); Assert.assertArrayEquals(lemmaSample.getTags(), deSerializedLemmaSample.getTags()); }
@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getTokens()), Arrays.hashCode(getTags()), Arrays.hashCode(getLemmas())); }
@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getTokens()), Arrays.hashCode(getTags()), Arrays.hashCode(getLemmas())); }
@Test public void testRetrievingContent() { LemmaSample sample = new LemmaSample(createSentence(), createTags(), createLemmas()); Assert.assertArrayEquals(createSentence(), sample.getTokens()); Assert.assertArrayEquals(createTags(), sample.getTags()); Assert.assertArrayEquals(createLemmas(), sample.getLemmas()); }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof LemmaSample) { LemmaSample a = (LemmaSample) obj; return Arrays.equals(getTokens(), a.getTokens()) && Arrays.equals(getTags(), a.getTags()) && Arrays.equals(getLemmas(), a.getLemmas()); } return false; } }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof LemmaSample) { LemmaSample a = (LemmaSample) obj; return Arrays.equals(getTokens(), a.getTokens()) && Arrays.equals(getTags(), a.getTags()) && Arrays.equals(getLemmas(), a.getLemmas()); } return false; } }
protected Iterator<Event> createEvents(LemmaSample sample) { if (sample != null) { List<Event> events = new ArrayList<>(); String[] toksArray = sample.getTokens(); String[] tagsArray = sample.getTags(); String[] lemmasArray = LemmatizerME.encodeLemmas(toksArray,sample.getLemmas()); for (int ei = 0, el = sample.getTokens().length; ei < el; ei++) { events.add(new Event(lemmasArray[ei], contextGenerator.getContext(ei,toksArray,tagsArray,lemmasArray))); } return events.iterator(); } else { return Collections.emptyListIterator(); } } }
protected Iterator<Event> createEvents(LemmaSample sample) { if (sample != null) { List<Event> events = new ArrayList<>(); String[] toksArray = sample.getTokens(); String[] tagsArray = sample.getTags(); String[] lemmasArray = LemmatizerME.encodeLemmas(toksArray,sample.getLemmas()); for (int ei = 0, el = sample.getTokens().length; ei < el; ei++) { events.add(new Event(lemmasArray[ei], contextGenerator.getContext(ei,toksArray,tagsArray,lemmasArray))); } return events.iterator(); } else { return Collections.emptyListIterator(); } } }
/** * Evaluates the given reference {@link LemmaSample} object. * * This is done by tagging the sentence from the reference * {@link LemmaSample} with the {@link Lemmatizer}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link LemmaSample}. * * @return the predicted {@link LemmaSample}. */ @Override protected LemmaSample processSample(LemmaSample reference) { String[] predictedLemmas = lemmatizer.lemmatize(reference.getTokens(), reference.getTags()); String[] referenceLemmas = reference.getLemmas(); for (int i = 0; i < referenceLemmas.length; i++) { if (referenceLemmas[i].equals(predictedLemmas[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new LemmaSample(reference.getTokens(), reference.getTags(), predictedLemmas); }
@Override public Sequence read() throws IOException { LemmaSample sample = samples.read(); if (sample != null) { String[] sentence = sample.getTokens(); String[] tags = sample.getTags(); String[] preds = sample.getLemmas(); Event[] events = new Event[sentence.length]; for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context = contextGenerator.getContext(i, sentence, tags, preds); events[i] = new Event(tags[i], context); } return new Sequence<>(events,sample); } return null; }
@Override public Sequence read() throws IOException { LemmaSample sample = samples.read(); if (sample != null) { String[] sentence = sample.getTokens(); String[] tags = sample.getTags(); String[] preds = sample.getLemmas(); Event[] events = new Event[sentence.length]; for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context = contextGenerator.getContext(i, sentence, tags, preds); events[i] = new Event(tags[i], context); } return new Sequence<>(events,sample); } return null; }