public void head(Node source, int depth) { if (source instanceof Element) { Element sourceEl = (Element) source; if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs ElementMeta meta = createSafeElement(sourceEl); Element destChild = meta.el; destination.appendChild(destChild); numDiscarded += meta.numAttribsDiscarded; destination = destChild; } else if (source != root) { // not a safe tag, so don't add. don't count root against discarded. numDiscarded++; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destText = new TextNode(sourceText.getWholeText()); destination.appendChild(destText); } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) { DataNode sourceData = (DataNode) source; DataNode destData = new DataNode(sourceData.getWholeData()); destination.appendChild(destData); } else { // else, we don't care about comments, xml proc instructions, etc numDiscarded++; } }
private JsonNode parseHtmlToJsonNode(String content) throws IOException { Document doc = Jsoup.parse(content); String indexer1 = "follows="; String indexer2 = ";seajs.use"; StringBuilder builder = new StringBuilder( doc.getElementsByTag("script") .get(15) .dataNodes() .get(0) .attr("data")); int index = builder.indexOf(indexer1); builder.delete(0, index + indexer1.length()); index = builder.indexOf(indexer2); builder.delete(index, builder.length()); return mapper.readTree(builder.toString()); } }
/** Create a new DataNode from HTML encoded data. @param encodedData encoded data @param baseUri bass URI @return new DataNode */ public static DataNode createFromEncoded(String encodedData, String baseUri) { String data = Entities.unescape(encodedData); return new DataNode(data); } }
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { accum.append(getWholeData()); // data is not escaped in return from data nodes, so " in script, style is plain }
final String newFontInfo = "@font-face { \n" + "font-family: 'MyFont';\n" + "src: url('file:///android_asset/Custom-Font.otf')\n" + "}\n"; Elements styles = doc.select("style"); for (Element style : styles) { for (DataNode data : style.dataNodes()) { String dataTxt = data.getWholeData(); if (dataTxt.contains("font-family")) { final String newData = dataTxt.replaceAll("font-family:\\s*'[^']*'","font-family: 'MyFont'"); data.setWholeData(newFontInfo + newData); } } }
@Override public void head(Node node, int depth) { if (node instanceof DataNode) { ((DataNode) node).setWholeData(""); } } @Override
void insert(Token.Character characterToken) { Node node; // characters in script and style go in as datanodes, not text nodes final String tagName = currentElement().tagName(); final String data = characterToken.getData(); if (characterToken.isCData()) node = new CDataNode(data); else if (tagName.equals("script") || tagName.equals("style")) node = new DataNode(data); else node = new TextNode(data); currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack. }
public void head(org.jsoup.nodes.Node source, int depth) { namespacesStack.push(new HashMap<>(namespacesStack.peek())); // inherit from above on the stack if (source instanceof org.jsoup.nodes.Element) { org.jsoup.nodes.Element sourceEl = (org.jsoup.nodes.Element) source; String prefix = updateNamespaces(sourceEl); String namespace = namespacesStack.peek().get(prefix); Element el = doc.createElementNS(namespace, sourceEl.tagName()); copyAttributes(sourceEl, el); if (dest == null) { // sets up the root doc.appendChild(el); } else { dest.appendChild(el); } dest = el; // descend } else if (source instanceof org.jsoup.nodes.TextNode) { org.jsoup.nodes.TextNode sourceText = (org.jsoup.nodes.TextNode) source; Text text = doc.createTextNode(sourceText.getWholeText()); dest.appendChild(text); } else if (source instanceof org.jsoup.nodes.Comment) { org.jsoup.nodes.Comment sourceComment = (org.jsoup.nodes.Comment) source; Comment comment = doc.createComment(sourceComment.getData()); dest.appendChild(comment); } else if (source instanceof org.jsoup.nodes.DataNode) { org.jsoup.nodes.DataNode sourceData = (org.jsoup.nodes.DataNode) source; Text node = doc.createTextNode(sourceData.getWholeData()); dest.appendChild(node); } else { // unhandled } }
public void head(Node source, int depth) { if (elementToSkip != null) { return; } if (source instanceof Element) { Element sourceElement = (Element) source; if (isSafeTag(sourceElement)) { String sourceTag = sourceElement.tagName(); Attributes destinationAttributes = sourceElement.attributes().clone(); Element destinationChild = new Element(Tag.valueOf(sourceTag), sourceElement.baseUri(), destinationAttributes); destination.appendChild(destinationChild); destination = destinationChild; } else if (source != root) { elementToSkip = sourceElement; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destinationText = new TextNode(sourceText.getWholeText(), source.baseUri()); destination.appendChild(destinationText); } else if (source instanceof DataNode && isSafeTag(source.parent())) { DataNode sourceData = (DataNode) source; DataNode destinationData = new DataNode(sourceData.getWholeData(), source.baseUri()); destination.appendChild(destinationData); } }
new DataNode(builder.toString(), mainScriptTag.baseUri())); fragmentNodes.add(mainScriptTag);
if (childNode instanceof DataNode) { DataNode data = (DataNode) childNode; sb.append(data.getWholeData()); } else if (childNode instanceof Comment) { Comment comment = (Comment) childNode;
sourceFiles.add(dn.getWholeData()); element.attr("src", sourceDocument); } else { DataNode dn = new DataNode(sourceDocument, element.baseUri()); element.appendChild(dn);
DataNode json = new DataNode("var x = { \"foo\" : \"bar\" }", "http://domain.tld/path"); script.appendChild(json);
return processDataNode(tag, ((DataNode) node).getWholeData());
public void head(Node source, int depth) { NodeWrapper node = new NodeWrapper(source); NodeWrapper parentNode = elements.isEmpty() ? null : elements.peek(); elements.push(node); if (source instanceof Element) { Element sourceEl = (Element) source; String tagName = sourceEl.tagName(); if (tagName.equals("body")) { return; } if(shouldKeepChild(node, parentNode)) { Element destChild = createSafeElement(sourceEl); destination.appendChild(destChild); destination = destChild; } } else if (source instanceof TextNode) { TextNode sourceText = (TextNode) source; TextNode destText = new TextNode(sourceText.getWholeText(), source.baseUri()); destination.appendChild(destText); } else if (source instanceof DataNode) { DataNode sourceData = (DataNode) source; DataNode destData = new DataNode(sourceData.getWholeData(), source.baseUri()); destination.appendChild(destData); } }
private static Element createInlineJavaScriptElement( String javaScriptContents) { // defer makes no sense without src: // https://developer.mozilla.org/en/docs/Web/HTML/Element/script Element wrapper = createJavaScriptElement(null, false); wrapper.appendChild( new DataNode(javaScriptContents, wrapper.baseUri())); return wrapper; }
Document doc =Jsoup.connect("http://stackoverflow.com/questions/16780517/java-obtain-text-within-script-tag-using-jsoup").timeout(10000).get(); Elements scriptElements = doc.getElementsByTag("script"); for (Element element :scriptElements ){ for (DataNode node : element.dataNodes()) { System.out.println(node.getWholeData()); } System.out.println("-------------------"); }
/** * Replace link tags with style tags in order to keep the same inclusion * order * * @param doc * the html document * @param cssContents * the list of external css files with their content */ private void internStyles(Document doc, List<ExternalCss> cssContents) { Elements els = doc.select(CSS_LINKS_SELECTOR); for (Element e : els) { if (!TRUE_VALUE.equals(e.attr(SKIP_INLINE))) { String path = e.attr(HREF_ATTR); Element style = new Element(Tag.valueOf(STYLE_TAG), ""); style.appendChild(new DataNode(getCss(cssContents, path), "")); e.replaceWith(style); } } }
for (Element tag : scriptTags){ for (DataNode node : tag.dataNodes()) { System.out.println(node.getWholeData()); } }
void insert(Token.Character characterToken) { Node node; // characters in script and style go in as datanodes, not text nodes String tagName = currentElement().tagName(); if (tagName.equals("script") || tagName.equals("style")) node = new DataNode(characterToken.getData(), baseUri); else node = new TextNode(characterToken.getData(), baseUri); currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack. }