public LemmaSample read() throws IOException { List<String> toks = new ArrayList<>(); List<String> tags = new ArrayList<>(); List<String> preds = new ArrayList<>(); for (String line = samples.read(); line != null && !line.equals(""); line = samples.read()) { String[] parts = line.split("\t"); if (parts.length != 3) { System.err.println("Skipping corrupt line: " + line); } else { toks.add(parts[0]); tags.add(parts[1]); preds.add(parts[2]); } } if (toks.size() > 0) { return new LemmaSample(toks.toArray(new String[toks.size()]), tags.toArray(new String[tags.size()]), preds.toArray(new String[preds.size()])); } else { return null; } } }
@Override public LemmaSample read() throws IOException { ConlluSentence sentence = samples.read(); if (sentence != null) { List<String> tokens = new ArrayList<>(); List<String> tags = new ArrayList<>(); List<String> lemmas = new ArrayList<>(); for (ConlluWordLine line : sentence.getWordLines()) { tokens.add(line.getForm()); tags.add(line.getPosTag(tagset)); lemmas.add(line.getLemma()); } return new LemmaSample(tokens, tags, lemmas); } return null; } }
@Test(expected = IllegalArgumentException.class) public void testParameterValidation() { new LemmaSample(new String[] { "" }, new String[] { "" }, new String[] { "test", "one element to much" }); }
return new LemmaSample(toks.toArray(new String[toks.size()]), posTags.toArray(new String[posTags.size()]), predictedLemmas.toArray(new String[predictedLemmas.size()])); } else return new LemmaSample(toks.toArray(new String[toks.size()]), posTags.toArray(new String[posTags.size()]), goldLemmas.toArray(new String[goldLemmas.size()]));
public static LemmaSample createPredSample() { String[] lemmas = createLemmas(); lemmas[5] = "figure"; return new LemmaSample(createSentence(), createTags(), lemmas); }
public static LemmaSample createGoldSample() { return new LemmaSample(createSentence(), createTags(), createLemmas()); }
/** * Evaluates the given reference {@link LemmaSample} object. * * This is done by tagging the sentence from the reference * {@link LemmaSample} with the {@link Lemmatizer}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link LemmaSample}. * * @return the predicted {@link LemmaSample}. */ @Override protected LemmaSample processSample(LemmaSample reference) { String[] predictedLemmas = lemmatizer.lemmatize(reference.getTokens(), reference.getTags()); String[] referenceLemmas = reference.getLemmas(); for (int i = 0; i < referenceLemmas.length; i++) { if (referenceLemmas[i].equals(predictedLemmas[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new LemmaSample(reference.getTokens(), reference.getTags(), predictedLemmas); }
posSample.getTags()); System.out.println(new LemmaSample(posSample.getSentence(), posSample.getTags(), lemmas).toString());
@Test public void testToString() throws IOException { LemmaSample sample = new LemmaSample(createSentence(), createTags(), createLemmas()); String[] sentence = createSentence(); String[] tags = createTags(); String[] lemmas = createLemmas(); StringReader sr = new StringReader(sample.toString()); BufferedReader reader = new BufferedReader(sr); for (int i = 0; i < sentence.length; i++) { String line = reader.readLine(); String[] parts = line.split("\t"); Assert.assertEquals(3, parts.length); Assert.assertEquals(sentence[i], parts[0]); Assert.assertEquals(tags[i], parts[1]); Assert.assertEquals(lemmas[i], parts[2]); } }
@Test public void testRetrievingContent() { LemmaSample sample = new LemmaSample(createSentence(), createTags(), createLemmas()); Assert.assertArrayEquals(createSentence(), sample.getTokens()); Assert.assertArrayEquals(createTags(), sample.getTags()); Assert.assertArrayEquals(createLemmas(), sample.getLemmas()); }
public LemmaSample read() throws IOException { List<String> toks = new ArrayList<>(); List<String> tags = new ArrayList<>(); List<String> preds = new ArrayList<>(); for (String line = samples.read(); line != null && !line.equals(""); line = samples.read()) { String[] parts = line.split("\t"); if (parts.length != 3) { System.err.println("Skipping corrupt line: " + line); } else { toks.add(parts[0]); tags.add(parts[1]); preds.add(parts[2]); } } if (toks.size() > 0) { return new LemmaSample(toks.toArray(new String[toks.size()]), tags.toArray(new String[tags.size()]), preds.toArray(new String[preds.size()])); } else { return null; } } }
public LemmaSample read() throws IOException { List<String> toks = new ArrayList<>(); List<String> tags = new ArrayList<>(); List<String> preds = new ArrayList<>(); for (String line = samples.read(); line != null && !line.equals(""); line = samples.read()) { String[] parts = line.split("\t"); if (parts.length != 3) { System.err.println("Skipping corrupt line: " + line); } else { toks.add(parts[0]); tags.add(parts[1]); preds.add(parts[2]); } } if (toks.size() > 0) { return new LemmaSample(toks.toArray(new String[toks.size()]), tags.toArray(new String[tags.size()]), preds.toArray(new String[preds.size()])); } else { return null; } } }
@Override public LemmaSample read() throws IOException { ConlluSentence sentence = samples.read(); if (sentence != null) { List<String> tokens = new ArrayList<>(); List<String> tags = new ArrayList<>(); List<String> lemmas = new ArrayList<>(); for (ConlluWordLine line : sentence.getWordLines()) { tokens.add(line.getForm()); tags.add(line.getPosTag(tagset)); lemmas.add(line.getLemma()); } return new LemmaSample(tokens, tags, lemmas); } return null; } }
@Override public LemmaSample read() throws IOException { ConlluSentence sentence = samples.read(); if (sentence != null) { List<String> tokens = new ArrayList<>(); List<String> tags = new ArrayList<>(); List<String> lemmas = new ArrayList<>(); for (ConlluWordLine line : sentence.getWordLines()) { tokens.add(line.getForm()); tags.add(line.getPosTag(tagset)); lemmas.add(line.getLemma()); } return new LemmaSample(tokens, tags, lemmas); } return null; } }
return new LemmaSample(words, tags, lemmas);
/** * Evaluates the given reference {@link LemmaSample} object. * * This is done by tagging the sentence from the reference * {@link LemmaSample} with the {@link Lemmatizer}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link LemmaSample}. * * @return the predicted {@link LemmaSample}. */ @Override protected LemmaSample processSample(LemmaSample reference) { String[] predictedLemmas = lemmatizer.lemmatize(reference.getTokens(), reference.getTags()); String[] referenceLemmas = reference.getLemmas(); for (int i = 0; i < referenceLemmas.length; i++) { if (referenceLemmas[i].equals(predictedLemmas[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new LemmaSample(reference.getTokens(), reference.getTags(), predictedLemmas); }
/** * Evaluates the given reference {@link LemmaSample} object. * * This is done by tagging the sentence from the reference * {@link LemmaSample} with the {@link Lemmatizer}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link LemmaSample}. * * @return the predicted {@link LemmaSample}. */ @Override protected LemmaSample processSample(LemmaSample reference) { String[] predictedLemmas = lemmatizer.lemmatize(reference.getTokens(), reference.getTags()); String[] referenceLemmas = reference.getLemmas(); for (int i = 0; i < referenceLemmas.length; i++) { if (referenceLemmas[i].equals(predictedLemmas[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new LemmaSample(reference.getTokens(), reference.getTags(), predictedLemmas); }
posSample.getTags()); System.out.println(new LemmaSample(posSample.getSentence(), posSample.getTags(), lemmas).toString());
posSample.getTags()); System.out.println(new LemmaSample(posSample.getSentence(), posSample.getTags(), lemmas).toString());