public ProspectiveTagTypeIterator(final Source source, final int pos) { // returns empty iterator if pos out of range final ParseText parseText=source.getParseText(); cursor=root; int posIndex=0; try { // find deepest node that matches the text at pos: while (true) { final TagTypeRegister child=cursor.getChild(parseText.charAt(pos+(posIndex++))); if (child==null) break; cursor=child; } } catch (IndexOutOfBoundsException ex) {} // not avoiding this exception is expensive but only happens in the very rare circumstance that the end of file is encountered in the middle of a potential tag. // go back up until we reach a node that contains a list of tag types: while (cursor.tagTypes==null) if ((cursor=cursor.parent)==null) break; }
static CharacterReference construct(final Source source, final int begin, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { try { if (source.getParseText().charAt(begin)!='&') return null; return (source.getParseText().charAt(begin+1)=='#') ? NumericCharacterReference.construct(source,begin,unterminatedCharacterReferenceSettings) : CharacterEntityReference.construct(source,begin,unterminatedCharacterReferenceSettings.characterEntityReferenceMaxCodePoint); } catch (IndexOutOfBoundsException ex) { return null; } }
@Override public boolean atEndOfAttributes(final Source source, final int pos, final boolean isClosingSlashIgnored) { final ParseText parseText = source.getParseText(); return parseText.charAt(pos) == '>' || (parseText.containsAt("/>", pos)); }
public boolean atEndOfAttributes(final Source source, final int pos, final boolean isClosingSlashIgnored) { final ParseText parseText=source.getParseText(); return parseText.charAt(pos)=='>' || (!isClosingSlashIgnored && parseText.containsAt("/>",pos)); } }
private static boolean isXML(final Segment firstNonTextSegment) { if (firstNonTextSegment==null || !(firstNonTextSegment instanceof Tag)) return false; Tag tag=(Tag)firstNonTextSegment; if (tag.getTagType()==StartTagType.XML_DECLARATION) return true; // if document has a DOCTYPE declaration and it contains the text "xhtml", it is an XML document: if (tag.source.getParseText().indexOf("xhtml",tag.begin,tag.end)!=-1) return true; return false; }
protected int getEnd(final Source source, int pos) { final ParseText parseText=source.getParseText(); boolean insideQuotes=false; do { final char c=parseText.charAt(pos); if (c=='"') { insideQuotes=!insideQuotes; } else if (c=='>' && !insideQuotes) { return pos+1; } } while ((++pos)<source.getEnd()); return -1; } }
protected int getEnd(final Source source, final int pos) { // This method needs to be overridden because this tag type shares the same start delimiter as the downlevel hidden conditional comment. // The closing delimiter of the other tag type must not appear inside this tag. // Take the following example: // <!--[if IE]> ... <![endif]--> ... <!--[if !(IE 5)]><!--> ... <!--<![endif]--> // If the default implementation were used, then the parser would recognise the first tag as: // <!--[if IE]> ... <![endif]--> ... <!--[if !(IE 5)]><!--> final int delimiterBegin=source.getParseText().indexOf(MicrosoftConditionalCommentTagTypes.DOWNLEVEL_HIDDEN_IF.getClosingDelimiter(),pos); if (delimiterBegin==-1) return -1; if (source.getParseText().containsAt(getClosingDelimiter(),delimiterBegin)) return delimiterBegin+getClosingDelimiter().length(); // this is a downlevel hidden conditional comment, so fail this tag type silently without displaying a log message return -2; } }
static final Tag getPreviousTagUncached(final Source source, final int pos, final int breakAtPos) { // returns null if pos is out of range. try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.lastIndexOf('<',begin,breakAtPos); // this assumes that all tags start with '<' // parseText.lastIndexOf and indexOf return -1 if pos is out of range. if (begin==-1) return null; final Tag tag=getTagAt(source,begin,false); if (tag!=null && tag.includeInSearch()) return tag; } while ((begin-=1)>=0); } catch (IndexOutOfBoundsException ex) { throw new AssertionError("Unexpected internal exception"); } return null; }
/** * Causes the specified range of the source text to be ignored when parsing. * <p> * See the documentation of the {@link Segment#ignoreWhenParsing()} method for more information. * * @param begin the beginning character position in the source text. * @param end the end character position in the source text. */ public void ignoreWhenParsing(final int begin, final int end) { if (wasFullSequentialParseCalled()) throw new IllegalStateException("ignoreWhenParsing can not be used after a full sequential parse has been performed"); if (parseTextOutputDocument==null) { parseTextOutputDocument=new OutputDocument(getParseText()); parseText=null; } parseTextOutputDocument.replaceWithSpaces(begin,end); }
private static CharacterReference getNext(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { final ParseText parseText=source.getParseText(); pos=parseText.indexOf('&',pos); while (pos!=-1) { final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) return characterReference; pos=parseText.indexOf('&',pos+1); } return null; }
protected Tag constructTagAt(final Source source, final int pos) { final int closingDelimiterPos=source.getParseText().indexOf('>',pos+1); if (closingDelimiterPos==-1) return null; final Tag tag=constructStartTag(source,pos,closingDelimiterPos+1,"",null); if (source.logger.isErrorEnabled()) source.logger.error(source.getRowColumnVector(tag.getBegin()).appendTo(new StringBuilder(200).append("Encountered possible StartTag at ")).append(" whose content does not match a registered StartTagType").toString()); return tag; } }
/** * Returns the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document. * <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method) * <p> * This default implementation simply searches for the first occurrence of the * {@linkplain #getClosingDelimiter() closing delimiter} after the specified position, and returns the position immediately * after the end of it. * <p> * If the closing delimiter is not found, the value <code>-1</code> is returned. * * @param source the {@link Source} document. * @param pos the position in the source document. * @return the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document, or <code>-1</code> if the end of the tag can not be found. */ protected int getEnd(final Source source, final int pos) { final int delimiterBegin=source.getParseText().indexOf(getClosingDelimiter(),pos); return (delimiterBegin==-1 ? -1 : delimiterBegin+getClosingDelimiter().length()); } }
private static CharacterReference getPrevious(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { final ParseText parseText=source.getParseText(); pos=parseText.lastIndexOf('&',pos); while (pos!=-1) { final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) return characterReference; pos=parseText.lastIndexOf('&',pos-1); } return null; }
protected Tag constructTagAt(final Source source, final int pos) { final ParseText parseText=source.getParseText(); final int nameBegin=pos+getStartDelimiter().length(); final int nameEnd=parseText.indexOf(getClosingDelimiter(),nameBegin); final String name=source.getName(nameBegin,nameEnd); // throws IndexOutOfBoundsException if nameEnd==-1 final EndTag endTag=constructEndTag(source,pos,nameEnd+getClosingDelimiter().length(),name); if (source.logger.isErrorEnabled()) source.logger.error(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("Encountered possible EndTag at ")).append(" whose content does not match a registered EndTagType").toString()); return endTag; } }
static final Tag getNextTagUncached(final Source source, final int pos, final int breakAtPos) { // returns null if pos is out of range. try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.indexOf('<',begin,breakAtPos); // this assumes that all tags start with '<' // parseText.lastIndexOf and indexOf return -1 if pos is out of range. if (begin==-1) return null; final Tag tag=getTagAt(source,begin,false); if (tag!=null && tag.includeInSearch()) return tag; } while ((begin+=1)<source.end); } catch (IndexOutOfBoundsException ex) { // this should only happen when the end of file is reached in the middle of a tag. // we don't have to do anything to handle it as there are no more tags anyway. } return null; }
static final Tag getNextTagUncached(final Source source, final int pos, final TagType tagType, final int breakAtPos) { // returns null if pos is out of range. if (tagType==null) return getNextTagUncached(source,pos,breakAtPos); final String startDelimiter=tagType.getStartDelimiter(); try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.indexOf(startDelimiter,begin,breakAtPos); // parseText.lastIndexOf and indexOf return -1 if pos is out of range. if (begin==-1) return null; final Tag tag=getTagAt(source,begin,false); if (tag!=null && tag.getTagType()==tagType) return tag; } while ((begin+=1)<source.end); } catch (IndexOutOfBoundsException ex) { // this should only happen when the end of file is reached in the middle of a tag. // we don't have to do anything to handle it as there are no more tags anyway. } return null; }
static final Tag getPreviousTagUncached(final Source source, final int pos, final TagType tagType, final int breakAtPos) { // returns null if pos is out of range. if (tagType==null) return getPreviousTagUncached(source,pos,breakAtPos); final String startDelimiter=tagType.getStartDelimiter(); try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.lastIndexOf(startDelimiter,begin,breakAtPos); // parseText.lastIndexOf and indexOf return -1 if pos is out of range. if (begin==-1) return null; final Tag tag=getTagAt(source,begin,false); if (tag!=null && tag.getTagType()==tagType) return tag; } while ((begin-=1)>=0); } catch (IndexOutOfBoundsException ex) { // this should never happen during a get previous operation so rethrow it: throw ex; } return null; }
public static List<Segment> getStyleURISegments(final Segment segment) { if (segment==null || segment.length()==0) return Collections.emptyList(); if (segment.getFirstStartTag()==null) { // no start tags in this segment, assume the segment is a style attribute value int urlDelimiterStartPos=segment.getSource().getParseText().indexOf("url(",segment.getBegin(),segment.getEnd()); if (urlDelimiterStartPos==-1) return Collections.emptyList(); return addURLSegmentsFromCSS(new ArrayList<Segment>(),new Segment(segment.getSource(),urlDelimiterStartPos,segment.getEnd())); } List<Segment> uriSegments=new ArrayList<Segment>(); for (StartTag startTag : segment.getAllStartTags("style",null)) { addURLSegmentsFromCSS(uriSegments,startTag.getAttributes().get("style").getValueSegment()); } for (Element element : segment.getAllElements(HTMLElementName.STYLE)) { addURLSegmentsFromCSS(uriSegments,element.getContent()); } Collections.sort(uriSegments); return uriSegments; }
private static Appendable appendDecode(final Appendable appendable, final Segment segment, final int searchBegin, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) throws IOException { final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings=Config.CurrentCompatibilityMode.getUnterminatedCharacterReferenceSettings(insideAttributeValue); final Source source=segment.source; final ParseText parseText=source.getParseText(); final int end=segment.getEnd(); int begin=segment.getBegin(); int pos=parseText.indexOf('&',begin+searchBegin,end); while (pos!=-1) { final CharacterReference characterReference=CharacterReference.construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) { appendable.append(source.substring(begin,pos)); // Don't use appendable.append(source,begin,pos) as it checks source.length() which may throw an exception when using StreamedSource. characterReference.appendCharTo(appendable,convertNonBreakingSpaces); begin=characterReference.getEnd(); pos=parseText.indexOf('&',begin,end); } else { pos=parseText.indexOf('&',pos+1,end); } } appendable.append(source.substring(begin,end)); return appendable; }
private static List<Segment> addURLSegmentsFromCSS(final List<Segment> uriSegments, final Segment cssSegment) { final Source source=cssSegment.getSource(); final ParseText parseText=source.getParseText(); final int breakAtIndex=cssSegment.getEnd(); for (int pos=cssSegment.getBegin(); (pos=parseText.indexOf("url(",pos,breakAtIndex))!=-1;) { pos+=4; while (pos<breakAtIndex && Segment.isWhiteSpace(parseText.charAt(pos))) pos++; if (pos>=breakAtIndex) break; if (isQuote(parseText.charAt(pos))) { pos++; if (pos>=breakAtIndex) break; } final int uriBegin=pos; final int closingBracketPos=parseText.indexOf(')',uriBegin,breakAtIndex); if (closingBracketPos==-1) break; pos=closingBracketPos; while (Segment.isWhiteSpace(parseText.charAt(pos-1))) pos--; if (isQuote(parseText.charAt(pos-1))) pos--; final int uriEnd=pos; if (uriEnd<=uriBegin) break; uriSegments.add(new Segment(source,uriBegin,uriEnd)); pos=closingBracketPos; } return uriSegments; }