@Nullable public static <T extends Lexeme> T parse(final Class<T> lexemeClass, final String string) { final String[] fields = string.split("\t"); if (fields.length < 2) { return null; } final String id = fields[0].trim(); final List<Token> tokens = Lists.newArrayList(); for (final String tokenStr : fields[1].substring("tokens=".length()).split(",")) { tokens.add(Token.parse(tokenStr.trim(), id)); } final ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); for (int i = 2; i < fields.length; ++i) { final String fieldStr = fields[i]; final int index = fieldStr.indexOf('='); if (index < 0) { final String key = fieldStr.trim().intern(); final String value = "true"; builder.put(key, value); } else { final String key = fieldStr.substring(0, index).trim().intern(); final String value = fieldStr.substring(index + 1).trim().intern(); builder.put(key, value); } } return create(lexemeClass, id, tokens, builder.build()); }
public final <T extends Appendable> T toString(final T out) throws IOException { out.append(this.id); out.append("\ttokens="); for (int i = 0; i < this.tokens.size(); ++i) { out.append(i == 0 ? "" : ","); this.tokens.get(i).toString(out); } final Map<String, String> properties = getProperties(); for (final String name : Ordering.natural().immutableSortedCopy(properties.keySet())) { final String value = properties.get(name); out.append('\t').append(name).append('=').append(value); } return out; }
public Multimap<Term, T> match(final KAFDocument document, final Iterable<Term> terms) { Preconditions.checkNotNull(document); final Set<Term> termSet = ImmutableSet.copyOf(terms); final Multimap<Term, T> result = HashMultimap.create(); for (final Term term : termSet) { final String lemma = term.getLemma(); final String stem = Stemming.stem(null, lemma); for (final T lexeme : ImmutableSet.copyOf(Iterables.concat( this.lemmaIndex.get(term.getLemma()), this.stemIndex.get(stem)))) { if (lexeme.match(document, termSet, term)) { result.put(term, lexeme); } } } return result; }
final Term child = deps.get(i).getTo(); if (Iterables.contains(terms, child)) { childSolutions[i] = matchRecursive(document, terms, child); combinations *= childSolutions[i] == null ? 1 : childSolutions[i].length + 1;
public Lexicon(final Iterable<? extends T> lexemes) { final ImmutableMap.Builder<String, T> idBuilder = ImmutableMap.builder(); final ImmutableMultimap.Builder<String, T> lemmaBuilder = ImmutableMultimap.builder(); final ImmutableMultimap.Builder<String, T> stemBuilder = ImmutableMultimap.builder(); for (final T lexeme : Ordering.natural().immutableSortedCopy(lexemes)) { idBuilder.put(lexeme.getId(), lexeme); for (final Token token : lexeme.getTokens()) { if (token.getLemma() != null) { lemmaBuilder.put(token.getLemma(), lexeme); } if (token.getStem() != null) { stemBuilder.put(token.getStem(), lexeme); } } } this.idIndex = idBuilder.build(); this.lemmaIndex = lemmaBuilder.build(); this.stemIndex = stemBuilder.build(); }
public final boolean match(final KAFDocument document, final Iterable<Term> terms, final Term head) { final Term[][] solutions = matchRecursive(document, terms, head); if (solutions != null) { outer: for (final Term[] solution : solutions) { for (int i = 0; i < this.tokens.size(); ++i) { if (solution[i] == null) { continue outer; } } return true; } } return false; }
public static <T extends Lexeme, L extends Lexicon<T>> L readFrom(final Class<L> lexiconClass, final Class<T> lexemeClass, final Reader reader) throws IOException { final List<T> lexemes = Lists.newArrayList(); final BufferedReader in = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader); String line; while ((line = in.readLine()) != null) { T token = Lexeme.parse(lexemeClass, line); if (token == null) { continue; } lexemes.add(token); } return create(lexiconClass, lexemes); }
@Override public final String toString() { try { return toString(new StringBuilder()).toString(); } catch (final IOException ex) { throw new Error(ex); } }
@Override public boolean contains(final Object object) { if (object instanceof Lexeme) { final Lexeme lexeme = (Lexeme) object; return this.idIndex.get(lexeme.getId()) == lexeme; } return false; }
public <A extends Appendable> A writeTo(final A out) throws IOException { for (final Lexeme lexeme : this.idIndex.values()) { lexeme.toString(out); out.append('\n'); } return out; }