public static Token create(@Nullable final String lemma, @Nullable final String stem, @Nullable final String pos) { return lemma == null && stem == null && pos == null ? WILDCARD // : new Token(lemma, stem, pos); }
@Nullable Lexeme toLexeme() { if (this.text == null) { return null; } final List<Token> tokens = Lists.newArrayList(); for (final String word : this.text.split("\\s+")) { tokens.add(Token.create(word.toLowerCase(), null, null)); } final String[] semantics = this.semantics.toArray(new String[this.semantics.size()]); return new Lexeme(this.id, tokens, this.aptitude, this.attention, this.pleasentness, this.polarity, this.sensitivity, semantics); }
@Nullable public static <T extends Lexeme> T parse(final Class<T> lexemeClass, final String string) { final String[] fields = string.split("\t"); if (fields.length < 2) { return null; } final String id = fields[0].trim(); final List<Token> tokens = Lists.newArrayList(); for (final String tokenStr : fields[1].substring("tokens=".length()).split(",")) { tokens.add(Token.parse(tokenStr.trim(), id)); } final ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); for (int i = 2; i < fields.length; ++i) { final String fieldStr = fields[i]; final int index = fieldStr.indexOf('='); if (index < 0) { final String key = fieldStr.trim().intern(); final String value = "true"; builder.put(key, value); } else { final String key = fieldStr.substring(0, index).trim().intern(); final String value = fieldStr.substring(index + 1).trim().intern(); builder.put(key, value); } } return create(lexemeClass, id, tokens, builder.build()); }
if (this.tokens.get(i).match(head)) { indexes.add(i); ++combinations;
final String lemma = stemmed ? null : word; final String stem = stemmed ? Stemming.stem(null, word) : null; final Token token = Token.create(lemma, stem, pos); final String id = word + (stemmed ? "_stemmed" : "") + (pos == null ? "" : "_" + pos.toLowerCase());
public Lexicon(final Iterable<? extends T> lexemes) { final ImmutableMap.Builder<String, T> idBuilder = ImmutableMap.builder(); final ImmutableMultimap.Builder<String, T> lemmaBuilder = ImmutableMultimap.builder(); final ImmutableMultimap.Builder<String, T> stemBuilder = ImmutableMultimap.builder(); for (final T lexeme : Ordering.natural().immutableSortedCopy(lexemes)) { idBuilder.put(lexeme.getId(), lexeme); for (final Token token : lexeme.getTokens()) { if (token.getLemma() != null) { lemmaBuilder.put(token.getLemma(), lexeme); } if (token.getStem() != null) { stemBuilder.put(token.getStem(), lexeme); } } } this.idIndex = idBuilder.build(); this.lemmaIndex = lemmaBuilder.build(); this.stemIndex = stemBuilder.build(); }
public static Token parse(final String string) { return parse(string, null); }
public final <T extends Appendable> T toString(final T out) throws IOException { out.append(this.id); out.append("\ttokens="); for (int i = 0; i < this.tokens.size(); ++i) { out.append(i == 0 ? "" : ","); this.tokens.get(i).toString(out); } final Map<String, String> properties = getProperties(); for (final String name : Ordering.natural().immutableSortedCopy(properties.keySet())) { final String value = properties.get(name); out.append('\t').append(name).append('=').append(value); } return out; }
@Override public String toString() { try { return toString(new StringBuilder()).toString(); } catch (final IOException ex) { throw new Error(ex); } }
public static Token parse(final String string, @Nullable String altLemma) { final String[] fields = string.split("\\|"); String lemma = fields[0].trim(); final String stem = fields[1].trim(); final String pos = fields[2].trim(); if (".".equals(lemma)) { lemma = altLemma; } return create("*".equals(lemma) ? null : lemma, "*".equals(stem) ? null : stem, "*".equals(pos) ? null : pos); }