@Override public TokenStream create(final TokenStream input) { final URILocalnameFilter filter = new URILocalnameFilter(input); filter.setMaxLength(maxLength); return filter; }
public URILocalnameFilter(final TokenStream input) { super(input); termAtt = this.addAttribute(CharTermAttribute.class); posIncrAtt = this.addAttribute(PositionIncrementAttribute.class); termBuffer = CharBuffer.allocate(256); }
protected void nextToken() { // There is still delimiters while (this.findNextToken()) { // SRN-66 & SRN-79: skip tokens with less than 3 characters if (end - start < 3) { start = end; continue; } this.updateToken(); _nTokens++; return; } if (_shouldReturnLocalname && startLocalname < termLength) { // return the full localname this.updateLocalnameToken(); _shouldReturnLocalname = false; return; } // No more delimiters, we have to return the full URI as last step this.updateFinalToken(); _isNormalising = false; }
protected boolean findNextToken() { // If localname is too large, do not tokenise it if (termLength - start > maxLength) { start++; // increment start pointer since it points to a delimiter end = termLength; return true; } while (start < termLength) { if (this.isDelim(termBuffer.get(start))) { start++; continue; } else { end = start; do { end++; } while (end < termLength && !this.isBreakPoint(termBuffer.get(end))); if (end < termLength) { // we found a breakpoint, we should return the fulle localname _shouldReturnLocalname = true; } return true; } } return false; }
t.reset(); final URILocalnameFilter filter = new URILocalnameFilter(t); filter.setMaxLength(MAX_LENGTH); assertTrue("token "+i+" exists", filter.incrementToken()); assertFalse("end of stream", filter.incrementToken()); filter.end(); filter.close();
@Override public final boolean incrementToken() throws java.io.IOException { // While we are normalising the URI if (_isNormalising) { this.posIncrAtt.setPositionIncrement(1); // reset the position increment this.nextToken(); return true; } // Otherwise, get next URI token and start normalisation if (input.incrementToken()) { termLength = termAtt.length(); this.updateBuffer(); _isNormalising = true; _shouldReturnLocalname = false; // we return the full localname only if a breakpoint is found _nTokens = 0; startLocalname = start = end = 0; startLocalname = start = this.findLocalname(); this.nextToken(); return true; } return false; }
/** * Given the type of URI normalisation, apply the right sequence of operations * and filters to the token stream. */ private TokenStream applyURINormalisation(TokenStream in) { switch (normalisationType) { case NONE: return new URITrailingSlashFilter(in); // here, trailing slash filter is after localname filtering, in order to // avoid filtering subdirectory instead of localname case LOCALNAME: in = new URILocalnameFilter(in); return new URITrailingSlashFilter(in); // here, trailing slash filter is before localname filtering, in order to // avoid trailing slash checking on every tokens generated by the // URI normalisation filter case FULL: in = new URITrailingSlashFilter(in); return new URINormalisationFilter(in); default: throw new EnumConstantNotPresentException(URINormalisation.class, normalisationType.toString()); } }