/** Return true if the character is valid as the first one of an XML name */ public boolean isNameStartChar (char c) { return isLetter (c) || c == ':' || c == '_'; }
/** Return true if the character is valid inside of an XML name */ public boolean isNameChar (char c) { return isLetter (c) || isDigit (c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar (c) || isExtender (c) ; }
public boolean isLetter (char c) { return isBaseChar (c) || isIdeographic (c); }
/** * Verify the character at * @param source * @param offset * @return */ public String isValid (XMLSource source, int offset) { char c = source.charAt (offset); if (Character.isHighSurrogate (c)) { if (offset + 1 >= source.length ()) return "Unexpected end of input"; char c2 = source.charAt (offset + 1); if (Character.isLowSurrogate (c2)) return isValid (Character.toCodePoint (c, c2)); return "Character after first in surrogate pair is not between 0xDC00 and 0xDFFF: "+Integer.toHexString (c2); } return isValid (c); }
if (!v.isNameStartChar (entity.charAt (1))) throw new IllegalArgumentException ("Entity name doesn't begin with a valid character: '"+entity+"'"); for (int i=2; i<N; i++) if (!v.isNameChar (entity.charAt (i)))
/** Advance the current position past any whitespace in the input */ protected void skipWhiteSpace () { while (pos < source.length () && charValidator.isWhitespace (source.charAt (pos))) pos ++; }
protected void parseDocTypeText (Token token) { token.setType (Type.TEXT); pos --; while (pos < source.length () && getCharValidator ().isNameChar (source.charAt (pos))) pos ++; String s = source.substring (token.getStartOffset (), pos); if (s.length () == 0) throw new XMLParseException ("Expected some text"+lookAheadForErrorMessage ("but found", token.getStartOffset (), 20), token); // TODO How about "<!DOCTYPE SYSTEM ..."? if ("SYSTEM".equals (s)) token.setType (Type.DOCTYPE_SYSTEM); else if ("PUBLIC".equals (s)) token.setType (Type.DOCTYPE_PUBLIC); else if ("NDATA".equals (s)) token.setType (Type.DOCTYPE_NDATA); }
protected boolean isValidName (XMLTokenizer tokenizer, String name) { return name != null && name.length () > 0 && charValidator.isNameStartChar (name.charAt (0)); }
String msg = charValidator.isValid (value); if (msg == null) return value;
/** Read an XML name */ protected void parseName (String objectName) { int startPos = pos; if (pos < source.length () && charValidator.isNameStartChar (source.charAt (pos))) { pos ++; while (pos < source.length () && charValidator.isNameChar (source.charAt (pos))) pos ++; } if (pos == startPos) { throw new XMLParseException ("Expected valid XML name for "+objectName+lookAheadForErrorMessage ("but found", startPos, 20), source, startPos); } }
/** Read the name of an element. * * <p>The resulting token will contain the '<' plus any whitespace between * it and the name plus the name itself but no whitespace after the name. */ protected void parseBeginElement (Token token) { token.setType (Type.BEGIN_ELEMENT); inStartElement = true; skipWhiteSpace (); int nameStartOffset = pos; parseName ("start tag"); if (pos == nameStartOffset) throw new XMLParseException ("Missing element name", token); if (pos >= source.length ()) throw new XMLParseException ("Missing '>' of start tag", source, pos); char c = source.charAt (pos); if (!charValidator.isWhitespace (c) && c != '/' && c != '>') throw new XMLParseException ("Expected whitespace, '>' or '/>' after element name", source, pos); }
protected void parseEntity (Token token) { token.setType (Type.ENTITY); char c; if (pos < source.length ()) { c = source.charAt (pos); if (c == '#') pos ++; } while (pos < source.length ()) { c = source.charAt (pos); if (c == ';') break; if (!charValidator.isNameChar (c)) throw new XMLParseException ("Illegal character in entity: ["+c+"] ("+Integer.toHexString (c)+")", source, pos); pos ++; } expect (';'); verifyEntity (token.getStartOffset (), pos); }
String msg = charValidator.isValid (source, pos); if (msg != null) throw new XMLParseException ("Illegal character found in processing instruction. "+msg, source, pos);
String msg = charValidator.isValid (source, pos); if (msg != null) throw new XMLParseException ("Illegal character found in text. "+msg, source, pos);
String msg = charValidator.isValid (source, pos); if (msg != null) throw new XMLParseException ("Illegal character found in CDATA. "+msg, source, pos);
String msg = getCharValidator ().isValid (source, pos); if (msg != null) throw new XMLParseException ("Illegal character found in quoted text. "+msg, source, pos);
String msg = charValidator.isValid (source, pos); if (msg != null) throw new XMLParseException ("Illegal character found in comment. "+msg, source, pos);
insideEntity = false; } else { String msg = charValidator.isValid(source, pos); if (msg != null) throw new XMLParseException("Illegal character found in attribute value. " + msg, source, pos);
String msg = charValidator.isValid (source, pos); if (msg != null) throw new XMLParseException ("Illegal character found in attribute value. "+msg, source, pos);
insideEntity = false; } else { String msg = charValidator.isValid(source, pos); if (msg != null) throw new XMLParseException("Illegal character found in attribute value. " + msg, source, pos);