final void appendAttributeName(char append) { appendAttributeName(String.valueOf(append)); }
final void appendTagName(char append) { appendTagName(String.valueOf(append)); }
final void appendAttributeValue(char[] append) { ensureAttributeValue(); pendingAttributeValue.append(append); }
case nullChar: t.error(this); t.tagPending.appendAttributeValue(replacementChar); t.transition(AttributeValue_unquoted); break; case '`': t.error(this); t.tagPending.appendAttributeValue(c); t.transition(AttributeValue_unquoted); break;
void read(Tokeniser t, CharacterReader r) { String value = r.consumeToAnySorted(attributeValueUnquoted); if (value.length() > 0) t.tagPending.appendAttributeValue(value); int[] ref = t.consumeCharacterReference('>', true); if (ref != null) t.tagPending.appendAttributeValue(ref); else t.tagPending.appendAttributeValue('&'); break; case '>': case nullChar: t.error(this); t.tagPending.appendAttributeValue(replacementChar); break; case eof: case '`': t.error(this); t.tagPending.appendAttributeValue(c); break; default: // hit end of buffer in first read, still in attribute t.tagPending.appendAttributeValue(c);
if (r.matchesLetter()) { String name = r.consumeLetterSequence(); t.tagPending.appendTagName(name); t.dataBuffer.append(name); return;
case nullChar: t.error(this); t.tagPending.newAttribute(); r.unconsume(); t.transition(AttributeName); case '=': t.error(this); t.tagPending.newAttribute(); t.tagPending.appendAttributeName(c); t.transition(AttributeName); break; default: // A-Z, anything else t.tagPending.newAttribute(); r.unconsume(); t.transition(AttributeName);
void read(Tokeniser t, CharacterReader r) { String name = r.consumeToAnySorted(attributeNameCharsSorted); t.tagPending.appendAttributeName(name); case nullChar: t.error(this); t.tagPending.appendAttributeName(replacementChar); break; case eof: case '<': t.error(this); t.tagPending.appendAttributeName(c); break; default: // buffer underrun t.tagPending.appendAttributeName(c);
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { String name = r.consumeLetterSequence(); t.tagPending.appendTagName(name); t.dataBuffer.append(name); return;
case nullChar: t.error(this); t.tagPending.appendAttributeName(replacementChar); t.transition(AttributeName); break; case '<': t.error(this); t.tagPending.newAttribute(); t.tagPending.appendAttributeName(c); t.transition(AttributeName); break; default: // A-Z, anything else t.tagPending.newAttribute(); r.unconsume(); t.transition(AttributeName);
t.tagPending.appendTagName(tagName); break; case nullChar: // replacement t.tagPending.appendTagName(replacementStr); break; case eof: // should emit pending tag? break; default: // buffer underrun t.tagPending.appendTagName(c);
void read(Tokeniser t, CharacterReader r) { String value = r.consumeToAny(attributeDoubleValueCharsSorted); if (value.length() > 0) t.tagPending.appendAttributeValue(value); else t.tagPending.setEmptyAttributeValue(); int[] ref = t.consumeCharacterReference('"', true); if (ref != null) t.tagPending.appendAttributeValue(ref); else t.tagPending.appendAttributeValue('&'); break; case nullChar: t.error(this); t.tagPending.appendAttributeValue(replacementChar); break; case eof: break; default: // hit end of buffer in first read, still in attribute t.tagPending.appendAttributeValue(c);
void read(Tokeniser t, CharacterReader r) { String value = r.consumeToAny(attributeSingleValueCharsSorted); if (value.length() > 0) t.tagPending.appendAttributeValue(value); else t.tagPending.setEmptyAttributeValue(); int[] ref = t.consumeCharacterReference('\'', true); if (ref != null) t.tagPending.appendAttributeValue(ref); else t.tagPending.appendAttributeValue('&'); break; case nullChar: t.error(this); t.tagPending.appendAttributeValue(replacementChar); break; case eof: break; default: // hit end of buffer in first read, still in attribute t.tagPending.appendAttributeValue(c);
final void newAttribute() { if (attributes == null) attributes = new Attributes(); if (pendingAttributeName != null) { // the tokeniser has skipped whitespace control chars, but trimming could collapse to empty for other control codes, so verify here pendingAttributeName = pendingAttributeName.trim(); if (pendingAttributeName.length() > 0) { String value; if (hasPendingAttributeValue) value = pendingAttributeValue.length() > 0 ? pendingAttributeValue.toString() : pendingAttributeValueS; else if (hasEmptyAttributeValue) value = ""; else value = null; attributes.put(pendingAttributeName, value); } } pendingAttributeName = null; hasEmptyAttributeValue = false; hasPendingAttributeValue = false; reset(pendingAttributeValue); pendingAttributeValueS = null; }
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(RCDATAEndTagOpen); } else if (r.matchesLetter() && t.appropriateEndTagName() != null && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.tagPending = t.createTagPending(false).name(t.appropriateEndTagName()); t.emitTagPending(); r.unconsume(); // undo "<" t.transition(Data); } else { t.emit("<"); t.transition(Rcdata); } } },
Element insert(Token.StartTag startTag) { // handle empty unknown tags // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag. if (startTag.isSelfClosing()) { Element el = insertEmpty(startTag); stack.add(el); tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data tokeniser.emit(emptyEnd.reset().name(el.tagName())); // ensure we get out of whatever state we are in. emitted for yielded processing return el; } Element el = new Element(Tag.valueOf(startTag.name(), settings), baseUri, settings.normalizeAttributes(startTag.attributes)); insert(el); return el; }
boolean isAppropriateEndTagToken() { return lastStartTag != null && tagPending.name().equalsIgnoreCase(lastStartTag); }
void emitTagPending() { tagPending.finaliseTag(); emit(tagPending); }
@Override Tag reset() { tagName = null; normalName = null; pendingAttributeName = null; reset(pendingAttributeValue); pendingAttributeValueS = null; hasEmptyAttributeValue = false; hasPendingAttributeValue = false; selfClosing = false; attributes = null; return this; }
final void finaliseTag() { // finalises for emit if (pendingAttributeName != null) { // todo: check if attribute name exists; if so, drop and error newAttribute(); } }