/** * Fetch the next token and make sure it's one of {@code expected}. If not, create an * {@link XMLParseException} using the {@code errorMessage} */ protected Token expect (XMLTokenizer tokenizer, Token startToken, Type[] expected, String errorMessage) { Token token = tokenizer.next (); //System.out.println (token); boolean valid = token != null; if (valid) { valid = false; for (Type t: expected) { if (token.getType () == t) { valid = true; break; } } } if (!valid) { if (token == null) token = startToken; throw new XMLParseException (errorMessage + ": " + token, token); } return token; }
protected Token skipWhiteSpaceAndComments (XMLTokenizer tokenizer, Token token, DocTypeNode n) { while (token != null) { if (token.getType () == Type.DTD_WHITESPACE) { n.addNode (toNode (token)); token = tokenizer.next (); } else if (token.getType () == Type.DOCTYPE_COMMENT) { n.addNode (toNode (token)); token = tokenizer.next (); } else break; } return token; }
/** * Fetch the next token and make sure it's {@code expected}. If not, create an * {@link XMLParseException} using the {@code errorMessage} */ protected Token expect (XMLTokenizer tokenizer, Token startToken, Type expected, String errorMessage) { Token token = tokenizer.next (); //System.out.println (token); if (token == null || token.getType () != expected) { if (token == null) token = startToken; throw new XMLParseException (errorMessage + tokenizer.lookAheadForErrorMessage ("but found", token.getStartOffset (), 20) + " (" + token + ")", token); } return token; }
protected Token parseSystemLiteral (XMLTokenizer tokenizer, Token startToken, DocType docType) { docType.setDocTypeType (DocTypeType.SYSTEM); Token token = expect (tokenizer, startToken, Type.DTD_WHITESPACE, "Expected whitespace after 'SYSTEM'"); docType.add (toNode (token)); token = expect (tokenizer, startToken, Type.DOCTYPE_QUOTED_TEXT, "Expected quoted system literal after 'SYSTEM'"); docType.add (toNode (token)); String s = token.getText (); docType.setSystemLiteral (s.substring (1, s.length () - 1)); return skipOptionalWhitespace (tokenizer, tokenizer.next (), docType); }
protected Token parseAttListNameTokens (XMLTokenizer tokenizer, Token token, DocTypeAttributeList attList) { while ((token = tokenizer.next ()) != null) { attList.addNode (toNode (token)); if (token.getType () == Type.DTD_WHITESPACE || token.getType () == Type.TEXT || token.getType () == Type.DOCTYPE_ALTERNATIVE ) continue; if (token.getType () == Type.DOCTYPE_END_GROUP) break; throw new XMLParseException ("Expected whitespace, '|' or a name token", token); } return token; }
protected Token parsePublicLiteral (XMLTokenizer tokenizer, Token startToken, DocType docType) { docType.setDocTypeType (DocTypeType.PUBLIC); Token token = expect (tokenizer, startToken, Type.DTD_WHITESPACE, "Expected whitespace after 'PUBLIC'"); docType.add (toNode (token)); token = expect (tokenizer, startToken, Type.DOCTYPE_QUOTED_TEXT, "Expected quoted public id after 'PUBLIC'"); docType.add (toNode (token)); String s = token.getText (); docType.setPublicLiteral (s.substring (1, s.length () - 1)); token = expect (tokenizer, startToken, Type.DTD_WHITESPACE, "Expected whitespace after public id "+docType.getPublicLiteral ()); docType.add (toNode (token)); token = expect (tokenizer, startToken, Type.DOCTYPE_QUOTED_TEXT, "Expected quoted system literal after the public id of 'PUBLIC'"); docType.add (toNode (token)); s = token.getText (); docType.setSystemLiteral (s.substring (1, s.length () - 1)); return skipOptionalWhitespace (tokenizer, tokenizer.next (), docType); }
Token token = tokenizer.next (); if (token == null) return null;
/** * If the next token is whitespace, skip it. * * @param tokenizer * @param startToken This might be whitespace * @param docType * @return The current or the next token. */ protected Token skipOptionalWhitespace (XMLTokenizer tokenizer, Token startToken, DocType docType) { if (startToken == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", tokenizer.getSource (), tokenizer.getSource ().length ()); Token token = startToken; if (token.getType () == Type.DTD_WHITESPACE) { docType.add (toNode (token)); token = tokenizer.next (); if (token == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", startToken); } return token; }
protected Token parseAttListTypeGroup (XMLTokenizer tokenizer, Token token, DocTypeAttributeList attList) { attList.addNode (toNode (token)); Token startGroup = token; int subLevel = 0; while ((token = tokenizer.next ()) != null) { attList.addNode (toNode (token)); if (token.getType () == Type.DOCTYPE_END_GROUP) { if (subLevel == 0) break; subLevel --; } else if (token.getType () == Type.DOCTYPE_BEGIN_GROUP) { subLevel ++; } } if (token == null) throw new XMLParseException ("Expected end of group"+tokenizer.lookAheadForErrorMessage ("but found", startGroup.getStartOffset (), 20), startGroup); return token; }
while (true) token = tokenizer.next (); if (token == null || token.getType() == Type.BEGIN_ELEMENT_END) break;
protected DocType parseDocType (XMLTokenizer tokenizer) Token startToken = tokenizer.next (); if (startToken == null) throw new XMLParseException ("Expected '<!DOCTYPE'", tokenizer.getSource (), tokenizer.getOffset ()); docType.setName (token.getText ()); token = skipOptionalWhitespace (tokenizer, tokenizer.next (), docType); token = tokenizer.next (); if (token != null) throw new XMLParseException ("Expected no further tokens from the DTD tokenizer: "+token, token);
protected void parseDocTypeSubElement (XMLTokenizer tokenizer, Token startToken, DocType docType) { Token token = startToken; token = expect (tokenizer, token, Type.DTD_WHITESPACE, "Expected whitespace after '<!ELEMENT'"); token = expect (tokenizer, token, Type.TEXT, "Expected element name"); String name = token.getText (); token = expect (tokenizer, token, Type.DTD_WHITESPACE, "Expected whitespace after element name"); Token beforeContent = token; while ((token = tokenizer.next ()) != null) { //System.out.println ("parseDocTypeSubElement "+token); if (token.getType() == Type.DOCTYPE_END) break; // TODO Check EMPTY, ANY, #PCDATA, (|), ?, *, + } if (token == null) throw new XMLParseException ("Unexpected EOF while parsing element content", tokenizer.getSource (), tokenizer.getOffset ()); String content = tokenizer.getSource ().substring (beforeContent.getEndOffset (), token.getStartOffset ()); startToken.setEndOffset (token.getEndOffset ()); DocTypeElement element = new DocTypeElement (startToken, name, content); docType.add (element); }
Token token; while ((token = tokenizer.next ()) != null)