protected HTMLConfiguration newConfiguration() { HTMLConfiguration config = new HTMLConfiguration(); // Maintain original case for elements and attributes config.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); // Get notified of entity and character references config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true); config.setFeature("http://xml.org/sax/features/namespaces", true); return config; }
protected HTMLConfiguration newConfiguration() { HTMLConfiguration config = new HTMLConfiguration(); // Maintain original case for elements and attributes config.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); // Get notified of entity and character references config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true); config.setFeature("http://xml.org/sax/features/namespaces", true); return config; }
protected HTMLConfiguration newConfiguration() { HTMLConfiguration config = new HTMLConfiguration(); // Maintain original case for elements and attributes config.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); // Get notified of entity and character references config.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true); config.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true); config.setFeature("http://xml.org/sax/features/namespaces", true); return config; }
/** * Returns a parser suitable for parsing HTML documents. * The NekoHTML parser is used with some settings to * preserve case of tag names and disable namespace processing. * This method is used by {@link #parseHTML}. * @return instance of <code>org.apache.xerces.parsers.DOMParser</code> * with Neko configuration */ public static DOMParser getHTMLParser() { try { HTMLConfiguration config = new HTMLConfiguration(); config.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); DOMParser parser = new DOMParser(config); return parser; } catch(Exception exc) { throw new NestedApplicationException(exc); } }
public TemplateParserConfiguration(HTMLScanner scanner, String templateDefaultCharset, boolean balanceTag) { AdditionalHandlerFilter starter = new AdditionalHandlerFilter(); addComponent(starter); setProperty(TemplateScanner.HTML_NAMES_ELEMS, "match"); setProperty(TemplateScanner.HTML_NAMES_ATTRS, "no-change"); /* テンプレート上にエンコーディング指定がなければUTF-8と見なす */ setProperty(TemplateScanner.HTML_DEFAULT_ENCODING, templateDefaultCharset); setProperty(TemplateScanner.FILTERS, new XMLDocumentFilter[] { starter }); /* 元のテンプレート内容を忠実に再現させるオプション。 * ただし、</html>の後ろは無視される。false(デフォルト)の場合は、 * </body>と</html>の後につづくものをnekoがむりやり前に持ってくる */ setFeature(IGNORE_OUTSIDE_CONTENT, true); /* <html>や<body>が無い場合もそのままにするオプション。 * これが無いと勝手に付与されてしまう。 */ setFeature(DOCUMENT_FRAGMENT, true); /* HTMLの省略可能な閉じタグなどを自動的に付与するオプション。 * これをfalseにするべきではないが、HTML5の場合にはaタグがblock要素になっているが * NekoHTMLはinlineとして見てしまうため意図しない動きをするため、HTMLのバランスを * 作成者側で保証することとしてfalseにする。 */ setFeature(BALANCE_TAGS, balanceTag); fDocumentScanner = scanner; fDocumentScanner.reset(this); }
/** * Returns a parser suitable for parsing HTML documents. * The NekoHTML parser is used with some settings to * preserve case of tag names and disable namespace processing. * This method is used by {@link #parseHTML}. * @return instance of <code>org.apache.xerces.parsers.DOMParser</code> * with Neko configuration */ public static DOMParser getHTMLParser() { try { HTMLConfiguration config = new HTMLConfiguration(); config.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); DOMParser parser = new DOMParser(config); return parser; } catch(Exception exc) { throw new NestedApplicationException(exc); } }
public Object createResource() { try { final org.cyberneko.html.HTMLConfiguration config = new org.cyberneko.html.HTMLConfiguration(); config.setFeature("http://xml.org/sax/features/namespaces", false); config.setFeature("http://cyberneko.org/html/features/override-doctype", true); config.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); // Avoids the inclusion of <HTML><BODY>, etc. around template fragments. Tag balancing will only // be performed inside the fragments' root nodes. config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); config.setProperty("http://cyberneko.org/html/properties/doctype/pubid", ""); config.setProperty("http://cyberneko.org/html/properties/doctype/sysid", ""); config.setProperty("http://cyberneko.org/html/properties/names/elems", "match"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change"); return new org.apache.xerces.parsers.DOMParser(config); } catch(final Exception e) { throw new ConfigurationException( "Error while creating nekoHTML-based parser for " + "LEGACYHTML5 template modes.", e); } }
/** * Create a new NekoHTML configuration * * @return */ static public HTMLConfiguration getNewHtmlConfiguration() { final HTMLConfiguration config = new HTMLConfiguration(); config.setFeature("http://xml.org/sax/features/namespaces", true); config.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", false); config.setFeature("http://cyberneko.org/html/features/balance-tags", true); config.setFeature("http://cyberneko.org/html/features/report-errors", false); config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); config.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower"); return config; }
configuration.setProperty( FILTERS, new XMLDocumentFilter[] { javaScriptFilter } ); if (HTMLParserFactory.isPreserveTagCase()) { configuration.setProperty( TAG_NAME_CASE, "match" ); configuration.setProperty( ATTRIBUTE_NAME_CASE, "no-change" ); } else { configuration.setProperty( TAG_NAME_CASE, "lower" ); configuration.setProperty( ATTRIBUTE_NAME_CASE, "lower" ); configuration.setProperty(TAG_NAME_CASE, "upper"); configuration.setProperty(ATTRIBUTE_NAME_CASE, "upper"); configuration.setProperty(TAG_NAME_CASE, "lower"); configuration.setProperty(ATTRIBUTE_NAME_CASE, "lower");
configuration.setProperty( FILTERS, new XMLDocumentFilter[] { javaScriptFilter } ); if (HTMLParserFactory.isPreserveTagCase()) { configuration.setProperty( TAG_NAME_CASE, "match" ); configuration.setProperty( ATTRIBUTE_NAME_CASE, "no-change" ); } else { configuration.setProperty( TAG_NAME_CASE, "lower" ); configuration.setProperty( ATTRIBUTE_NAME_CASE, "lower" ); configuration.setProperty(TAG_NAME_CASE, "upper"); configuration.setProperty(ATTRIBUTE_NAME_CASE, "upper"); configuration.setProperty(TAG_NAME_CASE, "lower"); configuration.setProperty(ATTRIBUTE_NAME_CASE, "lower");
configuration.setProperty( FILTERS, new XMLDocumentFilter[] { javaScriptFilter } ); if (HTMLParserFactory.isPreserveTagCase()) { configuration.setProperty( TAG_NAME_CASE, "match" ); configuration.setProperty( ATTRIBUTE_NAME_CASE, "no-change" ); } else { configuration.setProperty( TAG_NAME_CASE, "lower" ); configuration.setProperty( ATTRIBUTE_NAME_CASE, "lower" ); configuration.setProperty(TAG_NAME_CASE, "upper"); configuration.setProperty(ATTRIBUTE_NAME_CASE, "upper"); configuration.setProperty(TAG_NAME_CASE, "lower"); configuration.setProperty(ATTRIBUTE_NAME_CASE, "lower");
/** Adds a component. */ protected void addComponent(HTMLComponent component) { // add component to list fHTMLComponents.addElement(component); // add recognized features and set default states String[] features = component.getRecognizedFeatures(); addRecognizedFeatures(features); int featureCount = features != null ? features.length : 0; for (int i = 0; i < featureCount; i++) { Boolean state = component.getFeatureDefault(features[i]); if (state != null) { setFeature(features[i], state.booleanValue()); } } // add recognized properties and set default values String[] properties = component.getRecognizedProperties(); addRecognizedProperties(properties); int propertyCount = properties != null ? properties.length : 0; for (int i = 0; i < propertyCount; i++) { Object value = component.getPropertyDefault(properties[i]); if (value != null) { setProperty(properties[i], value); } } } // addComponent(HTMLComponent)
/** Adds a component. */ protected void addComponent(HTMLComponent component) { // add component to list fHTMLComponents.addElement(component); // add recognized features and set default states String[] features = component.getRecognizedFeatures(); addRecognizedFeatures(features); int featureCount = features != null ? features.length : 0; for (int i = 0; i < featureCount; i++) { Boolean state = component.getFeatureDefault(features[i]); if (state != null) { setFeature(features[i], state.booleanValue()); } } // add recognized properties and set default values String[] properties = component.getRecognizedProperties(); addRecognizedProperties(properties); int propertyCount = properties != null ? properties.length : 0; for (int i = 0; i < propertyCount; i++) { Object value = component.getPropertyDefault(properties[i]); if (value != null) { setProperty(properties[i], value); } } } // addComponent(HTMLComponent)
}; addRecognizedProperties(recognizedProperties); setProperty(NAMES_ELEMS, "upper"); setProperty(NAMES_ATTRS, "lower"); setProperty(ERROR_REPORTER, fErrorReporter); Object symbolTable = ObjectFactory.createObject("org.apache.xerces.util.SymbolTable", "org.apache.xerces.util.SymbolTable"); setProperty(SYMBOL_TABLE, symbolTable);
}; addRecognizedProperties(recognizedProperties); setProperty(NAMES_ELEMS, "upper"); setProperty(NAMES_ATTRS, "lower"); setProperty(ERROR_REPORTER, fErrorReporter); Object symbolTable = ObjectFactory.createObject("org.apache.xerces.util.SymbolTable", "org.apache.xerces.util.SymbolTable"); setProperty(SYMBOL_TABLE, symbolTable);
@Override protected DocumentFragment parseFragmentImpl(String source) throws GadgetException { DocumentHandler handler; HTMLConfiguration config = newConfiguration(); // http://cyberneko.org/html/features/balance-tags/document-fragment // deprecated http://cyberneko.org/html/features/document-fragment config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); config.setProperty("http://cyberneko.org/html/properties/balance-tags/fragment-context-stack", new QName[]{new QName(null, "HTML", "HTML", null), new QName(null, "BODY", "BODY", null)}); try { handler = parseHtmlImpl(source, config, new NekoPatchTagBalancer()); } catch (IOException ioe) { return null; } return handler.getFragment(); }
@Override protected DocumentFragment parseFragmentImpl(String source) throws GadgetException { DocumentHandler handler; HTMLConfiguration config = newConfiguration(); // http://cyberneko.org/html/features/balance-tags/document-fragment // deprecated http://cyberneko.org/html/features/document-fragment config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); config.setProperty("http://cyberneko.org/html/properties/balance-tags/fragment-context-stack", new QName[]{new QName(null, "HTML", "HTML", null), new QName(null, "BODY", "BODY", null)}); try { handler = parseHtmlImpl(source, config, new NekoPatchTagBalancer()); } catch (IOException ioe) { return null; } return handler.getFragment(); }
@Override protected DocumentFragment parseFragmentImpl(String source) throws GadgetException { DocumentHandler handler; HTMLConfiguration config = newConfiguration(); // http://cyberneko.org/html/features/balance-tags/document-fragment // deprecated http://cyberneko.org/html/features/document-fragment config.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); config.setProperty("http://cyberneko.org/html/properties/balance-tags/fragment-context-stack", new QName[]{new QName(null, "HTML", "HTML", null), new QName(null, "BODY", "BODY", null)}); try { handler = parseHtmlImpl(source, config, new NekoPatchTagBalancer()); } catch (IOException ioe) { return null; } return handler.getFragment(); }