public void head(Node node, int depth) { if (node instanceof TextNode) { TextNode textNode = (TextNode) node; appendNormalisedText(accum, textNode); } else if (node instanceof Element) { Element element = (Element) node; if (accum.length() > 0 && (element.isBlock() || element.tag.getName().equals("br")) && !TextNode.lastCharIsWhitespace(accum)) accum.append(' '); } }
public void tail(Node node, int depth) { // make sure there is a space between block tags and immediately following text nodes <div>One</div>Two should be "One Two". if (node instanceof Element) { Element element = (Element) node; if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !TextNode.lastCharIsWhitespace(accum)) accum.append(' '); } } }, this);
/** * 处理主要的tab type attr a和img特殊处理href和src * * @param element * @param tag * @param eleJsonObj */ private void processMain(Element element, String tag, JSONObject eleJsonObj) { eleJsonObj.put("tag", tag); eleJsonObj.put("type", element.isBlock() ? "block" : "inline"); if (tag.equals("a")) { processTagA(eleJsonObj, element); } else if (tag.equals("img")) { processTagImg(eleJsonObj, element); } else if (tag.equals("video") || tag.equals("audio")) { processTagVideoOrAudio(eleJsonObj, tag, element); } }
private boolean processWrapped( final FormattingAppendable out, final Node node, Boolean isBlock ) { if (node instanceof Element && (isBlock == null && ((Element) node).isBlock() || isBlock != null && isBlock)) { String s = node.toString(); int pos = s.indexOf(">"); out.lineIf(isBlock != null).append(s.substring(0, pos + 1)).lineIf(isBlock != null); next(); processHtmlTree(out, node, false); int endPos = s.lastIndexOf("<"); out.lineIf(isBlock != null).append(s.substring(endPos)).lineIf(isBlock != null); } else { out.append(node.toString()); next(); } return true; }
public boolean isBlock() { return originElement.isBlock(); }
private boolean isBlock(Node n) { boolean block = false; if(n != null && n instanceof Element) { Element el = (Element)n; block = el.isBlock() || el.tagName().equals("br"); } return block; }
public void tail(Node node, int depth) { // make sure there is a space between block tags and immediately // following text nodes <div>One</div>Two should be "One Two". if (node instanceof Element) { Element element = (Element) node; if (element == excluded) { excluded = null; } if (element.isBlock() && (node.nextSibling() instanceof TextNode) && !lastCharIsWhitespace(accum)) accum.append(' '); } } }, node);
public void head(Node node, int depth) { if (excluded == null && node instanceof TextNode) { TextNode textNode = (TextNode) node; appendNormalisedText(accum, textNode); } else if (node instanceof Element) { Element element = (Element) node; if (excludedTags.contains(element.tagName())) { excluded = element; } if (accum.length() > 0 && (element.isBlock() || element.tag().getName() .equals("br")) && !lastCharIsWhitespace(accum)) accum.append(' '); } }
/** * Check to see if there is a block-level node somewhere inside this node. * * @param node Current node * @return True is there is a block inside this node (which would be invalid HTML) */ private boolean checkInnerBlock(Element node) { boolean blockExists = false; for(final Element child : node.children()) { blockExists = child.isBlock() || checkInnerBlock(child); if(blockExists) { break; } } return blockExists; }
void appendTextSkipHidden(Element e, StringBuilder accum) { for (Node child : e.childNodes()) { if (unlikely(child)) continue; if (child instanceof TextNode) { TextNode textNode = (TextNode) child; String txt = textNode.text(); accum.append(txt); } else if (child instanceof Element) { Element element = (Element) child; if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum)) accum.append(" "); else if (element.tagName().equals("br")) accum.append(" "); appendTextSkipHidden(element, accum); } } }
private Element inlineTextToParagraph(Node next) { if (next == null) { return null; } List<Node> paragraphChildren = new ArrayList<Node>(); do { if (next instanceof Element && ((Element) next).isBlock()) { break; } else { paragraphChildren.add(next); } } while ((next = next.nextSibling()) != null); if (paragraphChildren.isEmpty()) { return null; } Element lastParagraph = new Element(P_TAG, ""); for (Node child : paragraphChildren) { child.remove(); lastParagraph.appendChild(child.clone()); } return lastParagraph; }
&& ((Element) next).isBlock()) { break;
public void tail(Node node, int depth) { if (node.getClass().equals(Element.class)) { Element elm = (Element) node; XMLElement anno = builder.add(beginMap.get(node), XMLElement.class); anno.setTag(elm.tagName()); anno.setId(elm.id()); anno.setSelector(elm.cssSelector()); anno.setAttributes(elm.attributes().html()); if (elm.className().isEmpty()) anno.setCls(elm.attr("type")); else anno.setCls(elm.className()); annotationMap.put(elm.cssSelector(), anno); if (!this.preserveWhitespace) if (elm.isBlock() || ArrayUtils.contains(blockElements, elm.tagName())) builder.add("\n"); } }
@Override public void tail(Node node, int depth) { if (node instanceof Element) { Element elm = (Element) node; XMLElement anno = builder.add(beginMap.get(node), XMLElement.class); anno.setTag(elm.tagName()); anno.setId(elm.id()); anno.setSelector(elm.cssSelector()); anno.setAttributes(elm.attributes().html()); if (elm.className().isEmpty()) anno.setCls(elm.attr("type")); else anno.setCls(elm.className()); annotationMap.put(elm.cssSelector(), anno); if (!this.preserveWhitespace) if (elm.isBlock() || ArrayUtils.contains(blockElements, elm.tagName())) builder.add("\n"); } else if (node instanceof XmlDeclaration) { XmlDeclaration xmlDecl = (XmlDeclaration) node; XmlDeclarationAnnotation anno = builder.add(beginMap.get(node), XmlDeclarationAnnotation.class); anno.setOuterHtml(xmlDecl.outerHtml()); } }
/** * Handle an ignored HTMLElement. * The default method here is to either write the HTMLElement as a block if it is a block element, * or write it directly if it is not. * * @param node Node to handle * @param converter Parent converter for this object. */ public void handleIgnoredHTMLElement(Element node, DocumentConverter converter) { if(node.isBlock()) { converter.output.writeBlock(node.toString()); } else { // Note: because this is an inline element, we want to make sure it stays that way! // this means turning off prettyPrinting, so that JSoup doesn't add unecessary spacing around // the child nodes. Document doc = node.ownerDocument(); boolean oldPrettyPrint = doc.outputSettings().prettyPrint(); doc.outputSettings().prettyPrint(false); converter.output.write(node.toString()); doc.outputSettings().prettyPrint(oldPrettyPrint); } }