private static void readEndTag(Tokeniser t, CharacterReader r, TokeniserState a, TokeniserState b) { if (r.matchesLetter()) { t.createTagPending(false); t.transition(a); } else { t.emit("</"); t.transition(b); } }
private void anythingElse(Tokeniser t, CharacterReader r) { t.emit("</" + t.dataBuffer.toString()); r.unconsume(); t.transition(Rcdata); } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '>': t.emitDoctypePending(); t.transition(Data); break; case eof: t.emitDoctypePending(); t.transition(Data); break; default: // ignore char break; } } },
void read(Tokeniser t, CharacterReader r) { switch (r.consume()) { case '/': t.createTempBuffer(); t.transition(ScriptDataEndTagOpen); break; case '!': t.emit("<!"); t.transition(ScriptDataEscapeStart); break; default: t.emit("<"); r.unconsume(); t.transition(ScriptData); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '>': t.tagPending.selfClosing = true; t.emitTagPending(); t.transition(Data); break; case eof: t.eofError(this); t.transition(Data); break; default: t.error(this); r.unconsume(); t.transition(BeforeAttributeName); } } },
private static void handleRcData(Token.StartTag startTag, HtmlTreeBuilder tb) { tb.tokeniser.transition(TokeniserState.Rcdata); tb.markInsertionMode(); tb.transition(Text); tb.insert(startTag); }
void read(Tokeniser t, CharacterReader r) { if (r.matches('-')) { t.emit('-'); t.advanceTransition(ScriptDataEscapedDashDash); } else { t.transition(ScriptData); } } },
void read(Tokeniser t, CharacterReader r) { String data = r.consumeTo("]]>"); t.dataBuffer.append(data); if (r.matchConsume("]]>") || r.isEmpty()) { t.emit(new Token.CData(t.dataBuffer.toString())); t.transition(Data); }// otherwise, buffer underrun, stay in data section } };
void read(Tokeniser t, CharacterReader r) { if (r.matches('-')) { t.emit('-'); t.advanceTransition(ScriptDataEscapeStartDash); } else { t.transition(ScriptData); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matchConsume("--")) { t.createCommentPending(); t.transition(CommentStart); } else if (r.matchConsumeIgnoreCase("DOCTYPE")) { t.transition(Doctype); } else if (r.matchConsume("[CDATA[")) { // todo: should actually check current namepspace, and only non-html allows cdata. until namespace // is implemented properly, keep handling as cdata //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) { t.createTempBuffer(); t.transition(CdataSection); } else { t.error(this); t.advanceTransition(BogusComment); // advance so this character gets in bogus comment data's rewind } } },
private static void readCharRef(Tokeniser t, TokeniserState advance) { int[] c = t.consumeCharacterReference(null, false); if (c == null) t.emit('&'); else t.emit(c); t.transition(advance); }
private static void handleRawtext(Token.StartTag startTag, HtmlTreeBuilder tb) { tb.tokeniser.transition(TokeniserState.Rawtext); tb.markInsertionMode(); tb.transition(Text); tb.insert(startTag); }
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.emit('/'); t.createTempBuffer(); t.advanceTransition(ScriptDataDoubleEscapeEnd); } else { t.transition(ScriptDataDoubleEscaped); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(RawtextEndTagOpen); } else { t.emit('<'); t.transition(Rawtext); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { t.createTagPending(false); t.tagPending.appendTagName(r.current()); t.dataBuffer.append(r.current()); t.advanceTransition(ScriptDataEscapedEndTagName); } else { t.emit("</"); t.transition(ScriptDataEscaped); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { t.createTagPending(false); t.tagPending.appendTagName(r.current()); t.dataBuffer.append(r.current()); t.advanceTransition(RCDATAEndTagName); } else { t.emit("</"); t.transition(Rcdata); } } },
void read(Tokeniser t, CharacterReader r) { if (r.isEmpty()) { t.eofError(this); t.emit("</"); t.transition(Data); } else if (r.matchesLetter()) { t.createTagPending(false); t.transition(TagName); } else if (r.matches('>')) { t.error(this); t.advanceTransition(Data); } else { t.error(this); t.advanceTransition(BogusComment); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(RCDATAEndTagOpen); } else if (r.matchesLetter() && t.appropriateEndTagName() != null && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) { // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than // consuming to EOF; break out here t.tagPending = t.createTagPending(false).name(t.appropriateEndTagName()); t.emitTagPending(); r.unconsume(); // undo "<" t.transition(Data); } else { t.emit("<"); t.transition(Rcdata); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { t.createTempBuffer(); t.dataBuffer.append(r.current()); t.emit("<" + r.current()); t.advanceTransition(ScriptDataDoubleEscapeStart); } else if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(ScriptDataEscapedEndTagOpen); } else { t.emit('<'); t.transition(ScriptDataEscaped); } } },
Element insert(Token.StartTag startTag) { // handle empty unknown tags // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag. if (startTag.isSelfClosing()) { Element el = insertEmpty(startTag); stack.add(el); tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data tokeniser.emit(emptyEnd.reset().name(el.tagName())); // ensure we get out of whatever state we are in. emitted for yielded processing return el; } Element el = new Element(Tag.valueOf(startTag.name(), settings), baseUri, settings.normalizeAttributes(startTag.attributes)); insert(el); return el; }