/** * Splits the input into grams of size n and returns an iterator over grams represented as [start index,length] * pairs into the input string. * <p> * The iterator is implemented as a sliding view over the input string rather than being backed by a * list, which makes this space efficient for large strings. * * @param input the input string to be split, cannot be null * @param n the gram size, a positive integer * @return a read only iterator over the resulting grams * @throws NullPointerException if input==null * @throws IllegalArgumentException if n is less than 1 */ public GramSplitterIterator split(String input, int n) { if (input == null) { throw new NullPointerException("input cannot be null"); } if (n < 1) { throw new IllegalArgumentException("n (gram size) cannot be smaller than 1, was " + n); } return new GramSplitterIterator(input, n, characterClasses); }
int nonWordChar = indexOfNonWordChar(gram); if (nonWordChar == 0) { throw new RuntimeException("Programming error"); i += gram.length() + 1; isFirstAfterSeparator = true; return findNext();
/** * Convenience list which splits the remaining items in this iterator into a list of gram strings * * @return an immutable list of extracted grams */ public List<String> toExtractedList() { List<String> gramList = new ArrayList<>(); while (hasNext()) { gramList.add(next().extractFrom(input)); } return Collections.unmodifiableList(gramList); } }
@Override public boolean hasNext() { if (nextGram != null) { return true; } nextGram = findNext(); return nextGram != null; }
@Override public Gram next() { Gram currentGram = nextGram; if (currentGram == null) { currentGram = findNext(); } if (currentGram == null) { throw new NoSuchElementException("No next gram at position " + i); } nextGram = null; return currentGram; }