@Override public boolean incrementToken() throws IOException { if (finiteStrings == null) { if (wasReset == false) { throw new IllegalStateException("reset() missing before incrementToken"); } // lazy init/consume Automaton automaton = toAutomaton(); // calls reset(), incrementToken() repeatedly, and end() on inputTokenStream finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); //note: would be nice to know the startOffset but toAutomaton doesn't capture it. We'll assume 0 endOffset = inputTokenStream.getAttribute(OffsetAttribute.class).endOffset(); } IntsRef string = finiteStrings.next(); if (string == null) { return false; } clearAttributes(); if (finiteStrings.size() > 1) { // if number of iterated strings so far is more than one... posIncrAtt.setPositionIncrement(0); // stacked } offsetAtt.setOffset(0, endOffset); Util.toBytesRef(string, bytesAtt.builder()); // now we have UTF-8 if (charTermAttribute != null) { charTermAttribute.setLength(0); charTermAttribute.append(bytesAtt.toUTF16()); } return true; }
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); Automaton automaton; try (TokenStream ts = stream) { automaton = toAutomaton(ts, ts2a); } LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); Set<IntsRef> set = new HashSet<>(); for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { set.add(IntsRef.deepCopyOf(string)); } return Collections.unmodifiableSet(set); }
@Override public boolean incrementToken() throws IOException { clearAttributes(); if (finiteStrings == null) { Automaton automaton = toAutomaton(); finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); } IntsRef string = finiteStrings.next(); if (string == null) { return false; } Util.toBytesRef(string, bytesAtt.builder()); // now we have UTF-8 if (charTermAttribute != null) { charTermAttribute.setLength(0); charTermAttribute.append(bytesAtt.toUTF16()); } if (payload != null) { payloadAttr.setPayload(this.payload); } return true; }
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); Automaton automaton; try (TokenStream ts = stream) { automaton = toAutomaton(ts, ts2a); } LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); Set<IntsRef> set = new HashSet<>(); for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { set.add(IntsRef.deepCopyOf(string)); } return Collections.unmodifiableSet(set); }
new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); Automaton automaton; try (TokenStream ts = stream) { automaton = toAutomaton(ts, ts2a); } LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); Set<IntsRef> set = new HashSet<>(); for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { set.add(IntsRef.deepCopyOf(string)); } return Collections.unmodifiableSet(set); }
new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); Automaton automaton; try (TokenStream ts = stream) { automaton = toAutomaton(ts, ts2a); } LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); Set<IntsRef> set = new HashSet<>(); for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { set.add(IntsRef.deepCopyOf(string)); } return Collections.unmodifiableSet(set); }
new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); Automaton automaton; try (TokenStream ts = stream) { automaton = toAutomaton(ts, ts2a); } LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); Set<IntsRef> set = new HashSet<>(); for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { set.add(IntsRef.deepCopyOf(string)); } return Collections.unmodifiableSet(set); }
new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());
new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());
new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, finiteStrings.size());