public void head(Node node, int depth) { if (node instanceof TextNode) { TextNode textNode = (TextNode) node; accum.append(textNode.getWholeText()); } }
/** * Get the text content of this text node. * @return Unencoded, normalised text. * @see TextNode#getWholeText() */ public String text() { return StringUtil.normaliseWhitespace(getWholeText()); }
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum)); }
public void head(org.jsoup.nodes.Node source, int depth) { namespacesStack.push(new HashMap<>(namespacesStack.peek())); // inherit from above on the stack if (source instanceof org.jsoup.nodes.Element) { org.jsoup.nodes.Element sourceEl = (org.jsoup.nodes.Element) source; String prefix = updateNamespaces(sourceEl); String namespace = namespacesStack.peek().get(prefix); Element el = doc.createElementNS(namespace, sourceEl.tagName()); copyAttributes(sourceEl, el); if (dest == null) { // sets up the root doc.appendChild(el); } else { dest.appendChild(el); } dest = el; // descend } else if (source instanceof org.jsoup.nodes.TextNode) { org.jsoup.nodes.TextNode sourceText = (org.jsoup.nodes.TextNode) source; Text text = doc.createTextNode(sourceText.getWholeText()); dest.appendChild(text); } else if (source instanceof org.jsoup.nodes.Comment) { org.jsoup.nodes.Comment sourceComment = (org.jsoup.nodes.Comment) source; Comment comment = doc.createComment(sourceComment.getData()); dest.appendChild(comment); } else if (source instanceof org.jsoup.nodes.DataNode) { org.jsoup.nodes.DataNode sourceData = (org.jsoup.nodes.DataNode) source; Text node = doc.createTextNode(sourceData.getWholeData()); dest.appendChild(node); } else { // unhandled } }
@NonNull @Override public HeadFilterDecision head(Node node, int depth) { if (signatureFound) { return HeadFilterDecision.REMOVE; } if (node instanceof Element) { lastElementCausedLineBreak = false; Element element = (Element) node; if (element.tag().equals(BLOCKQUOTE)) { return HeadFilterDecision.SKIP_ENTIRELY; } } else if (node instanceof TextNode) { TextNode textNode = (TextNode) node; if (lastElementCausedLineBreak && DASH_SIGNATURE_HTML.matcher(textNode.getWholeText()).matches()) { Node nextNode = node.nextSibling(); if (nextNode instanceof Element && ((Element) nextNode).tag().equals(BR)) { signatureFound = true; if (brElementPrecedingDashes != null) { brElementPrecedingDashes.remove(); brElementPrecedingDashes = null; } return HeadFilterDecision.REMOVE; } } } return HeadFilterDecision.CONTINUE; }
/** * 剩下的数据 按照左侧无空格方式处理生成Nodes * * @param lineHtml * @param liNode * @param jsonNodes */ private void processLiWithoutLeftWhiteSpace(String lineHtml, JSONObject liNode, JSONArray jsonNodes) { List<Node> nodes = Jsoup.parse(lineHtml).selectFirst("body").childNodes(); if (nodes.isEmpty()) { liNode.put("nodes", Collections.emptyList()); } else { JSONObject jsonObject = null; for (Node node : nodes) { String tag = node.nodeName(); if (isTextNode(tag)) { processMutilTextNode(jsonNodes, tag, ((TextNode) node).getWholeText()); } else { if (isNotNullSpan(node)) { jsonObject = convertNodeToJsonObject(node, node.nodeName(), true); if (jsonObject != null) { jsonNodes.add(jsonObject); } } } } liNode.put("nodes", jsonNodes); } }
return processTextNode(tag, ((TextNode) node).getWholeText());
public void head(Node source, int depth) { if (source instanceof Element) { Element sourceEl = (Element) source; if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs ElementMeta meta = createSafeElement(sourceEl); Element destChild = meta.el; destination.appendChild(destChild); numDiscarded += meta.numAttribsDiscarded; destination = destChild; } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded. numDiscarded++; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destText = new TextNode(sourceText.getWholeText()); destination.appendChild(destText); } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) { DataNode sourceData = (DataNode) source; DataNode destData = new DataNode(sourceData.getWholeData()); destination.appendChild(destData); } else { // else, we don't care about comments, xml proc instructions, etc numDiscarded++; } }
public void head(Node source, int depth) { if (elementToSkip != null) { return; } if (source instanceof Element) { Element sourceElement = (Element) source; if (isSafeTag(sourceElement)) { String sourceTag = sourceElement.tagName(); Attributes destinationAttributes = sourceElement.attributes().clone(); Element destinationChild = new Element(Tag.valueOf(sourceTag), sourceElement.baseUri(), destinationAttributes); destination.appendChild(destinationChild); destination = destinationChild; } else if (source != root) { elementToSkip = sourceElement; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destinationText = new TextNode(sourceText.getWholeText(), source.baseUri()); destination.appendChild(destinationText); } else if (source instanceof DataNode && isSafeTag(source.parent())) { DataNode sourceData = (DataNode) source; DataNode destinationData = new DataNode(sourceData.getWholeData(), source.baseUri()); destination.appendChild(destinationData); } }
String nextText = ((TextNode) next).getWholeText(); if (!nextText.isEmpty() && Character.isWhitespace(nextText.charAt(0))) { addSpaceAfter = false;
private boolean processText(FormattingAppendable out, TextNode node) { skip(); if (out.isPreFormatted()) { out.append(prepareText(node.getWholeText(), true)); } else { out.append(prepareText(node.text())); } return true; }
if (child instanceof TextNode) { if (textPrefix != null && textPrefix.length() > 0) out.append(textPrefix); String text = ((TextNode) child).getWholeText(); String preparedText = prepareText(text); out.append(preparedText);
private String processTextNodes(Node node) { pushState(node); Node child; FormattingAppendable out = new FormattingAppendableImpl(0); while ((child = peek()) != null) { if (child instanceof TextNode) { String text = ((TextNode) child).getWholeText(); out.append(prepareText(text)); skip(); } else if (child instanceof Element) { processElement(out, child); } else { skip(); } } transferIdToParent(); popState(null); return out.getText(); }
@Override protected boolean isApplicable(TextNode node) { if (HtmlUtils.hasAncestor(node, IGNORED_TAGS)) return false; return node.getWholeText().contains("@"); // fast scan here, do pattern match later } };
@Override public void process(java.util.Stack<NodeHandler> stack, ContentHandler contentHandler) throws SAXException { String text = isPreserveWhitespace() ? node.getWholeText() : node.text(); contentHandler.characters(text.toCharArray(), 0, text.length()); }
@Override protected boolean isApplicable(TextNode node) { if (HtmlUtils.hasAncestor(node, IGNORED_TAGS)) return false; String text = StringUtils.deleteWhitespace(node.getWholeText()).toLowerCase(); return text.contains(referenceType.toLowerCase()) && text.contains("#"); // fast scan here, do pattern match later } };
public void head(Node node, int depth) { if (node.getClass().equals(TextNode.class)) { if (this.preserveWhitespace) builder.add(((TextNode) node).getWholeText()); else builder.add(((TextNode) node).text()); } else { beginMap.put(node, builder.getPosition()); } }
private void normalizeTextBetweenNodes(TextNode textNode, Element prevElement, Element nextElement) { String wholeText = StringUtil.normaliseWhitespace(textNode.getWholeText()).trim(); if (wholeText.isEmpty()) { boolean isSurroundedByEqualTags = nextElement.tagName().equals(prevElement.tagName()) && CHILD_TAGS.contains(nextElement.tagName()); if (isSurroundedByEqualTags) { textNode.remove(); } } }
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parent()) || textNode instanceof CDataNode) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); }
@Override public void head(Node node, int depth) { if (node instanceof TextNode) { if (this.preserveWhitespace) builder.add(((TextNode) node).getWholeText()); else builder.add(((TextNode) node).text()); } else { beginMap.put(node, builder.getPosition()); } }