XMLTokenizer entityTokenizer = new XMLTokenizer (source); entityTokenizer.setEntityResolver (parentTokenizer.getEntityResolver ()); entityTokenizer.setTreatEntitiesAsText (parentTokenizer.isTreatEntitiesAsText ()); entityTokenizer.setCharValidator (parentTokenizer.getCharValidator ());
return null; Token token = createToken (); skipWhiteSpace (); parseAttribute (token); parseBeginSomething (token); parseEntity (token); parseText (token);
/** * Read an end tag. * * <p>The resulting token will contain the '</' and '>' plus the * name plus any whitespace between those three. */ protected void parseEndElement (Token token) { token.setType (Type.END_ELEMENT); skipWhiteSpace (); parseName ("end tag"); skipWhiteSpace (); expect ('>'); }
/** * @param source * @return */ protected XMLTokenizer createTokenizer (XMLSource source) { XMLTokenizer tokenizer = new XMLTokenizer (source); tokenizer.setTreatEntitiesAsText (treatEntitiesAsText); return tokenizer; }
/** * Fetch the next token and make sure it's {@code expected}. If not, create an * {@link XMLParseException} using the {@code errorMessage} */ protected Token expect (XMLTokenizer tokenizer, Token startToken, Type expected, String errorMessage) { Token token = tokenizer.next (); //System.out.println (token); if (token == null || token.getType () != expected) { if (token == null) token = startToken; throw new XMLParseException (errorMessage + tokenizer.lookAheadForErrorMessage ("but found", token.getStartOffset (), 20) + " (" + token + ")", token); } return token; }
tokenizer.setCharValidator (charValidator); tokenizer.setEntityResolver (entityResolver); Token token; while ((token = tokenizer.next ()) != null) XMLTokenizer dtdTokenizer = createDTDTokenizer (tokenizer.getSource (), token.getStartOffset ()); tokenizer.setOffset (dtdTokenizer.getOffset ());
parseName ("attribute"); skipWhiteSpace (); expect ('='); skipWhiteSpace (); +lookAheadForErrorMessage (null, token.getStartOffset (), i), token); verifyEntity (errorPos, pos+1); insideEntity = false; throw new XMLParseException ("Illegal character found in attribute value. "+msg, source, pos); skipChar (c); pos --; throw new XMLParseException ("Missing ';' after '&': "+lookAheadForErrorMessage (null, errorPos, 20), source, errorPos);
protected void parseDocTypeSubElement (XMLTokenizer tokenizer, Token startToken, DocType docType) { Token token = startToken; token = expect (tokenizer, token, Type.DTD_WHITESPACE, "Expected whitespace after '<!ELEMENT'"); token = expect (tokenizer, token, Type.TEXT, "Expected element name"); String name = token.getText (); token = expect (tokenizer, token, Type.DTD_WHITESPACE, "Expected whitespace after element name"); Token beforeContent = token; while ((token = tokenizer.next ()) != null) { //System.out.println ("parseDocTypeSubElement "+token); if (token.getType() == Type.DOCTYPE_END) break; // TODO Check EMPTY, ANY, #PCDATA, (|), ?, *, + } if (token == null) throw new XMLParseException ("Unexpected EOF while parsing element content", tokenizer.getSource (), tokenizer.getOffset ()); String content = tokenizer.getSource ().substring (beforeContent.getEndOffset (), token.getStartOffset ()); startToken.setEndOffset (token.getEndOffset ()); DocTypeElement element = new DocTypeElement (startToken, name, content); docType.add (element); }
token = skipWhiteSpaceAndComments (tokenizer, tokenizer.next (), notation); token = tokenizer.next (); if (token != null && token.getType () != Type.DOCTYPE_END throw new XMLParseException ("Unexpected EOF while parsing notation declaration", tokenizer.getSource (), tokenizer.getOffset ()); if (token.getType () != Type.DOCTYPE_END) throw new XMLParseException ("Expected '>' after notation declaration"+tokenizer.lookAheadForErrorMessage ("but found", token.getStartOffset (), 20), tokenizer.getSource (), tokenizer.getOffset ());
nextChars ("<![CDATA[", pos - 3, "Expected '<![CDATA['"); pos ++; if (pos+1 >= source.length ()) throw new XMLParseException ("Expected ']]>'"+lookAheadForErrorMessage ("but found", errorPos, 20), source, errorPos); throw new XMLParseException ("Illegal character found in CDATA. "+msg, source, pos); skipChar (c);
verifyEntity (errorPos, pos + 1); throw new XMLParseException ("Illegal character found in text. "+msg, source, pos); skipChar (c); throw new XMLParseException ("Missing ';' after '&': "+lookAheadForErrorMessage (null, errorPos, 20), source, errorPos);
/** * Fetch the next token and make sure it's one of {@code expected}. If not, create an * {@link XMLParseException} using the {@code errorMessage} */ protected Token expect (XMLTokenizer tokenizer, Token startToken, Type[] expected, String errorMessage) { Token token = tokenizer.next (); //System.out.println (token); boolean valid = token != null; if (valid) { valid = false; for (Type t: expected) { if (token.getType () == t) { valid = true; break; } } } if (!valid) { if (token == null) token = startToken; throw new XMLParseException (errorMessage + ": " + token, token); } return token; }
/** * If the next token is whitespace, skip it. * * @param tokenizer * @param startToken This might be whitespace * @param docType * @return The current or the next token. */ protected Token skipOptionalWhitespace (XMLTokenizer tokenizer, Token startToken, DocType docType) { if (startToken == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", tokenizer.getSource (), tokenizer.getSource ().length ()); Token token = startToken; if (token.getType () == Type.DTD_WHITESPACE) { docType.add (toNode (token)); token = tokenizer.next (); if (token == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", startToken); } return token; }
protected void parseEntity (Token token) { token.setType (Type.ENTITY); char c; if (pos < source.length ()) { c = source.charAt (pos); if (c == '#') pos ++; } while (pos < source.length ()) { c = source.charAt (pos); if (c == ';') break; if (!charValidator.isNameChar (c)) throw new XMLParseException ("Illegal character in entity: ["+c+"] ("+Integer.toHexString (c)+")", source, pos); pos ++; } expect (';'); verifyEntity (token.getStartOffset (), pos); }
/** * Check that the next character is {@code expected} and skip it */ protected void expect (char expected) { if (pos >= source.length () || source.charAt (pos) != expected) { throw new XMLParseException ("Expected '"+expected+"'"+lookAheadForErrorMessage ("but found", pos, 20), source, pos); } pos ++; }
protected DocType parseDocType (XMLTokenizer tokenizer) Token startToken = tokenizer.next (); if (startToken == null) throw new XMLParseException ("Expected '<!DOCTYPE'", tokenizer.getSource (), tokenizer.getOffset ()); if (startToken.getType () != Type.DOCTYPE) throw new XMLParseException ("Expected '<!DOCTYPE' but found '"+startToken.getText ()+"'", startToken); docType.setName (token.getText ()); token = skipOptionalWhitespace (tokenizer, tokenizer.next (), docType); token = tokenizer.next (); if (token != null) throw new XMLParseException ("Expected no further tokens from the DTD tokenizer: "+token, token);
token = skipWhiteSpaceAndComments (tokenizer, tokenizer.next (), entity); throw new XMLParseException ("Unexpected EOF while parsing entity declaration", tokenizer.getSource (), tokenizer.getOffset ()); token = skipWhiteSpaceAndComments (tokenizer, tokenizer.next (), entity); if (token == null) throw new XMLParseException ("Unexpected EOF while parsing entity declaration", tokenizer.getSource (), tokenizer.getOffset ()); throw new XMLParseException ("Expected '>' after entity declaration"+tokenizer.lookAheadForErrorMessage ("but found", token.getStartOffset (), 20), tokenizer.getSource (), tokenizer.getOffset ());
protected Token parseAttListTypeGroup (XMLTokenizer tokenizer, Token token, DocTypeAttributeList attList) { attList.addNode (toNode (token)); Token startGroup = token; int subLevel = 0; while ((token = tokenizer.next ()) != null) { attList.addNode (toNode (token)); if (token.getType () == Type.DOCTYPE_END_GROUP) { if (subLevel == 0) break; subLevel --; } else if (token.getType () == Type.DOCTYPE_BEGIN_GROUP) { subLevel ++; } } if (token == null) throw new XMLParseException ("Expected end of group"+tokenizer.lookAheadForErrorMessage ("but found", startGroup.getStartOffset (), 20), startGroup); return token; }
protected Token skipWhiteSpaceAndComments (XMLTokenizer tokenizer, Token token, DocTypeNode n) { while (token != null) { if (token.getType () == Type.DTD_WHITESPACE) { n.addNode (toNode (token)); token = tokenizer.next (); } else if (token.getType () == Type.DOCTYPE_COMMENT) { n.addNode (toNode (token)); token = tokenizer.next (); } else break; } return token; }
/** Read an XML name */ protected void parseName (String objectName) { int startPos = pos; if (pos < source.length () && charValidator.isNameStartChar (source.charAt (pos))) { pos ++; while (pos < source.length () && charValidator.isNameChar (source.charAt (pos))) pos ++; } if (pos == startPos) { throw new XMLParseException ("Expected valid XML name for "+objectName+lookAheadForErrorMessage ("but found", startPos, 20), source, startPos); } }