protected void parseDocTypeText (Token token) { token.setType (Type.TEXT); pos --; while (pos < source.length () && getCharValidator ().isNameChar (source.charAt (pos))) pos ++; String s = source.substring (token.getStartOffset (), pos); if (s.length () == 0) throw new XMLParseException ("Expected some text"+lookAheadForErrorMessage ("but found", token.getStartOffset (), 20), token); // TODO How about "<!DOCTYPE SYSTEM ..."? if ("SYSTEM".equals (s)) token.setType (Type.DOCTYPE_SYSTEM); else if ("PUBLIC".equals (s)) token.setType (Type.DOCTYPE_PUBLIC); else if ("NDATA".equals (s)) token.setType (Type.DOCTYPE_NDATA); }
/** Parse "<!--" or "<![CDATA[" */ protected void parseExcalamation (Token token) { char c = source.charAt (pos); if (c == '-') { pos ++; parseComment (token); } else if (c == '[') { pos ++; parseCData (token); } else if (c == 'D') { pos ++; parseDocType (token); } else throw new XMLParseException ("Expected '<!--' or '<![CDATA['", source, pos-2); }
/** * If the next token is whitespace, skip it. * * @param tokenizer * @param startToken This might be whitespace * @param docType * @return The current or the next token. */ protected Token skipOptionalWhitespace (XMLTokenizer tokenizer, Token startToken, DocType docType) { if (startToken == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", tokenizer.getSource (), tokenizer.getSource ().length ()); Token token = startToken; if (token.getType () == Type.DTD_WHITESPACE) { docType.add (toNode (token)); token = tokenizer.next (); if (token == null) throw new XMLParseException ("Unexpected EOF after '<!DOCTYPE'", startToken); } return token; }
/** * Verify an entity. If no entityResolver is installed, this does nothing. */ protected void verifyEntity (int start, int end) { if (entityResolver == null) return; String entity = source.substring (start, end); try { entityResolver.validateEntity (entity); } catch (IllegalArgumentException e) { throw new XMLParseException (e.getMessage (), e).setSource (source, start); } }
throw new XMLParseException ("Unexpected token "+token+" while parsing attributes of element "+parent.getName (), token); //@COBEX if (!Character.isWhitespace (token.getSource ().charAt (token.getStartOffset ()))) throw new XMLParseException ("Expected whitespace between attributes of element a but found "+token, token);
/** Return the string of text which this token represents in the XMLSource * * @return the text or <code>null</code> if there is no source */ public String getText () { return getSource() == null ? null : getSource().substring (getStartOffset(), getEndOffset()); }
protected void nextChars (String expected, int startPos, String errorMessage) { int len = expected.length () - (pos - startPos); if (pos + len > source.length ()) throw new XMLParseException (errorMessage, source, startPos); String s = source.substring (startPos, startPos + expected.length ()); if (!expected.equals (s)) throw new XMLParseException (errorMessage, source, startPos); pos += len; }
protected void parseDocTypeSubElement (XMLTokenizer tokenizer, Token startToken, DocType docType) { Token token = startToken; token = expect (tokenizer, token, Type.DTD_WHITESPACE, "Expected whitespace after '<!ELEMENT'"); token = expect (tokenizer, token, Type.TEXT, "Expected element name"); String name = token.getText (); token = expect (tokenizer, token, Type.DTD_WHITESPACE, "Expected whitespace after element name"); Token beforeContent = token; while ((token = tokenizer.next ()) != null) { //System.out.println ("parseDocTypeSubElement "+token); if (token.getType() == Type.DOCTYPE_END) break; // TODO Check EMPTY, ANY, #PCDATA, (|), ?, *, + } if (token == null) throw new XMLParseException ("Unexpected EOF while parsing element content", tokenizer.getSource (), tokenizer.getOffset ()); String content = tokenizer.getSource ().substring (beforeContent.getEndOffset (), token.getStartOffset ()); startToken.setEndOffset (token.getEndOffset ()); DocTypeElement element = new DocTypeElement (startToken, name, content); docType.add (element); }
/** * This moves the line and column information by the text found in the source. */ protected void moveToOffset (XMLSource source, int offset) { offset = Math.min (source.length (), offset); for (int i=0; i<offset; i++) { char c = source.charAt (i); //System.out.println (line+":"+column+" "+c+" ("+((int)c)+")"); if (c == '\r' || c == '\n') { line ++; column = 1; if (c == '\r' && i+1 < source.length () && source.charAt (i+1) == '\n') i ++; lineStartOffset = i + 1; } else if (c == '\t') column += (8 - (column % 8)) + 1; else column ++; } }
protected String lookAheadForErrorMessage (String conditionalPrefix, int pos, int len) { String found = ""; if (pos < source.length ()) { int len2 = source.length () - pos; len = Math.min (len, len2); String s = source.substring (pos, pos+len); if (len != len2) s += "..."; if (conditionalPrefix == null) found = TextUtils.escapeJavaString (s); else found = " " + conditionalPrefix + " " + TextUtils.escapeJavaString (s); } return found; }
/** * Check that the next character is {@code expected} and skip it */ protected void expect (char expected) { if (pos >= source.length () || source.charAt (pos) != expected) { throw new XMLParseException ("Expected '"+expected+"'"+lookAheadForErrorMessage ("but found", pos, 20), source, pos); } pos ++; }
/** Read an XML name */ protected void parseName (String objectName) { int startPos = pos; if (pos < source.length () && charValidator.isNameStartChar (source.charAt (pos))) { pos ++; while (pos < source.length () && charValidator.isNameChar (source.charAt (pos))) pos ++; } if (pos == startPos) { throw new XMLParseException ("Expected valid XML name for "+objectName+lookAheadForErrorMessage ("but found", startPos, 20), source, startPos); } }
/** * Verify the character at * @param source * @param offset * @return */ public String isValid (XMLSource source, int offset) { char c = source.charAt (offset); if (Character.isHighSurrogate (c)) { if (offset + 1 >= source.length ()) return "Unexpected end of input"; char c2 = source.charAt (offset + 1); if (Character.isLowSurrogate (c2)) return isValid (Character.toCodePoint (c, c2)); return "Character after first in surrogate pair is not between 0xDC00 and 0xDFFF: "+Integer.toHexString (c2); } return isValid (c); }
protected void parseEntity (Token token) { token.setType (Type.ENTITY); char c; if (pos < source.length ()) { c = source.charAt (pos); if (c == '#') pos ++; } while (pos < source.length ()) { c = source.charAt (pos); if (c == ';') break; if (!charValidator.isNameChar (c)) throw new XMLParseException ("Illegal character in entity: ["+c+"] ("+Integer.toHexString (c)+")", source, pos); pos ++; } expect (';'); verifyEntity (token.getStartOffset (), pos); }