/** * Creates a new token parser. * * @param srcHtml * the input HTML source. * @param readingPosition * the offset within the html source to start parsing at. */ public HtmlTokenParser(CharSequence srcHtml, int readingPosition) { this.caret = new Caret(srcHtml, readingPosition); this.srcHtml = srcHtml; resetState(); //NOPMD }
/** * Sets the Character encoding used by the data. * Only this way the stream is able to decode / encode binary data. * @param charsetName the name of the encoding */ public void setEncoding(String charsetName) { injector.setCharacterEncoding(charsetName); }
/** * @return an identical copy of this caret, identical to calling the copy constructor. */ public Caret copy() { return new Caret(this); }
/** * Tries to find an opening html tag. */ private void scanForHtmlTag() { if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "html")) { if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } status = Status.SCAN_FOR_HEAD_TAG; } else { // current token is not the html tag, we assume the document starts immediately with the // head status = Status.SCAN_FOR_HEAD_TAG; processToken(); } }
/** * Tries to scan for the end of a comment denoted by the "-->" sequence. * * @return false, if the parsing could not be completed because the end of the HTML source was * reached. True otherwise. */ private boolean scanForCommentEnd() { int pos = caret.getOffset(); if (caret.walkAfterMatchCheckCase("-->")) { status = Status.COMPLETE; return true; } else { caret.goTo(pos); return false; } }
/** * {@inheritDoc} */ @Override public String performInjection(CharSequence htmlData) { if (hasTerminated()) { return null; } if (leftOver == NO_LEFTOVER_CHARACTER_BYTES) { return super.performInjection(htmlData); } else { // we have some undecodeable bytes left: abort injection abortInjectionPointSearch(); return null; } }
/** * Walks forward with the caret until it points at a position which matches the given string. * Case-sensitive comparison is used. * * @param strToMatch * the string to match * @return true if the string was found, false if the end of the string was reached while * searching */ public boolean walkToMatchCheckCase(String strToMatch) { while (!endReached() && !startsWithCheckCase(strToMatch)) { offset++; } return !endReached(); }
/** * Walks forward with the caret until it points at a position which matches the given string. * Case-Insensitive comparison is used. * * @param strToMatch * the string to match * @return true if the string was found, false if the end of the string was reached while * searching */ public boolean walkToMatchIgnoreCase(String strToMatch) { while (!endReached() && !startsWithIgnoreCase(strToMatch)) { offset++; } return !endReached(); }
/** * Checks if the substring of the source text starting at the carets position starts with the * give String. Case-Sensitive Comparison is used. * * @param strToMatch * the string to match * @return true if matched */ public boolean startsWithCheckCase(String strToMatch) { int len = strToMatch.length(); if (len > wayToEnd()) { return false; } return CharSequenceUtils.checkEqualCheckCase(src, offset, len, strToMatch, 0, len); }
/** * Checks if the substring of the source text starting at the carets position starts with the * give String. Case-Insensitive Comparison is used. * * @param strToMatch * the string to match * @return true if matched */ public boolean startsWithIgnoreCase(String strToMatch) { int len = strToMatch.length(); if (len > wayToEnd()) { return false; } return CharSequenceUtils.checkEqualIgnoreCase(src, offset, len, strToMatch, 0, len); }
/** * Moves the caret to the given position. * * @param offset2 * the offset from the beginning of the text */ public void goTo(int offset2) { goN(offset2 - offset); }
/** * Creates a new print writer which performs the tag injection. * * @param originalWriter * The writer which is wrapped. * @param tagToInject * The tag(s) to insert. */ public TagInjectionPrintWriter(PrintWriter originalWriter, String tagToInject) { super(originalWriter); this.originalWriter = originalWriter; injector = new StreamedHtmlScriptInjector(tagToInject); }
/** * Decodes the given encoded character data and tries to inject the script tag into it. * * @param encodedHtmlData * the character encoded with the previously specified encoding * @return null, if no injection was performed. Otherwise, a copy of the input encoded data with * the script tag inserted. */ public byte[] performInjection(byte[] encodedHtmlData) { return performInjection(encodedHtmlData, 0, encodedHtmlData.length); }
/** * Walks forward with the caret until it points at the given character. Then advances the * position by one. Case-Sensitive comparison is used. * * @param c * the character to walk to * @return true if the character was found, false if the end of the string was reached while * searching */ public boolean walkAfterCharCheckCase(char c) { boolean found = walkToCharCheckCase(c); if (found) { offset++; } return found; }
@Override public PrintWriter append(CharSequence csq) { String newValue = injector.performInjection(csq); if (newValue == null) { originalWriter.append(csq); } else { originalWriter.write(newValue); } return this; }
/** * Performs a case sensitive comparison of the given substrings. * * @param a * the first sequence * @param b * the second sequence * @return true, if both character sequences are exactly equal, including the case */ public static boolean checkEqualCheckCase(CharSequence a, CharSequence b) { return checkEqualCheckCase(a, 0, a.length(), b, 0, b.length()); }
/** * {@inheritDoc} */ @Override protected void abortInjectionPointSearch() { super.abortInjectionPointSearch(); leftOver = NO_LEFTOVER_CHARACTER_BYTES; encodeBuffer = null; // NOPMD decodeBuffer = null; // NOPMD }
/** * Tries to find an opening body tag. Omitting both the head and the body tag is currently not * supported. */ private void scanForBodyTag() { if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "body")) { if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } // Perform injection after start of the head tag status = Status.INJECTION_POINT_FOUND; } else { // current token is not the head tag, we assume it must be the body tag (head is empty) abortInjectionPointSearch(); } }
/** * Tries to find an opening head tag. Omitting both the head and the body tag is currently not * supported. */ private void scanForHeadTag() { if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "head")) { if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } // Perform injection after start of the head tag status = Status.INJECTION_POINT_FOUND; } else { // current token is not the head tag, we assume it must be the body tag (head is empty) status = Status.SCAN_FOR_BODY_TAG; processToken(); } }
/** * Scans for an opening <?xml .. ?> declaration in case the html is delivered as XML. */ private void scanXmlDeclaration() { // Preamble checking based on the information on this page if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "?xml")) { // ?> is treated by the parser as a stand-alone tag if (tokenParser.getParsedTokenType() != Token.STANDALONE_TAG) { abortInjectionPointSearch(); return; } // Xml header detected, now we require an html doctype to continue status = Status.SCAN_REQUIRED_XHTML_PREAMBLE; } else { // no xml tag found, therefore we are non-strict in requiring a doctype declaration status = Status.SCAN_HTML_PREAMBLE; processToken(); } }