/** * Saves the current page, with all images, to the specified location. * The default behavior removes all script elements. * * @param file file to write this page into * @throws IOException If an error occurs */ public void save(final File file) throws IOException { new XmlSerializer().save(this, file); }
public void save(final SgmlPage page, final File file) throws IOException { String fileName = file.getName(); if (!fileName.endsWith(".htm") && !fileName.endsWith(".html")) { fileName += ".html"; } final File outputFile = new File(file.getParentFile(), fileName); if (outputFile.exists()) { throw new IOException("File already exists: " + outputFile); } fileName = fileName.substring(0, fileName.lastIndexOf('.')); outputDir_ = new File(file.getParentFile(), fileName); FileUtils.writeStringToFile(outputFile, asXml(page.getDocumentElement()), ISO_8859_1); }
protected void printXml(final DomElement node) { if (!isExcluded(node)) { final boolean hasChildren = node.getFirstChild() != null; buffer_.append(indent_).append('<'); printOpeningTag(node); if (!hasChildren && !isEmptyXmlTagExpanded(node)) { buffer_.append("/>").append('\n'); } else { buffer_.append(">").append('\n'); for (DomNode child = node.getFirstChild(); child != null; child = child.getNextSibling()) { indent_.append(" "); if (child instanceof DomElement) { printXml((DomElement) child); } else { buffer_.append(child); } indent_.setLength(indent_.length() - 2); } buffer_.append(indent_).append("</").append(node.getTagName()).append('>').append('\n'); } } }
protected void printXml(final DomElement node) throws IOException { if (!isExcluded(node)) { final boolean hasChildren = node.getFirstChild() != null; builder_.append(indent_).append('<'); printOpeningTag(node); if (!hasChildren && !node.isEmptyXmlTagExpanded()) { builder_.append("/>").append('\n'); } else { builder_.append(">").append('\n'); for (DomNode child = node.getFirstChild(); child != null; child = child.getNextSibling()) { indent_.append(" "); if (child instanceof DomElement) { printXml((DomElement) child); } else { builder_.append(child); } indent_.setLength(indent_.length() - 2); } builder_.append(indent_).append("</").append(node.getTagName()).append('>').append('\n'); } } }
protected Map<String, DomAttr> getAttributesFor(final HtmlLink link) { final Map<String, DomAttr> map = new HashMap<String, DomAttr>(link.getAttributesMap()); final String src = map.get("href").getValue(); try { final File file = createFile(src, ".css"); FileUtils.writeStringToFile(file, link.getWebResponse(true).getContentAsString()); map.get("href").setValue(outputDir_.getName() + File.separatorChar + file.getName()); } catch (final Exception e) { throw new RuntimeException(e); } return map; }
/** * Converts an HTML element to XML. * @param node a node * @return the text representation according to the setting of this serializer */ public String asXml(final HtmlElement node) { buffer_.setLength(0); indent_.setLength(0); String charsetName = null; if (node.getPage() instanceof HtmlPage) { charsetName = node.getPage().getPageEncoding(); } if (charsetName != null && node instanceof HtmlHtml) { buffer_.append("<?xml version=\"1.0\" encoding=\"").append(charsetName).append("\"?>").append('\n'); } printXml(node); final String response = buffer_.toString(); buffer_.setLength(0); return response; }
/** * Prints the content between "<" and ">" (or "/>") in the output of the tag name * and its attributes in XML format. * @param node the node whose opening tag is to be printed */ protected void printOpeningTag(final DomElement node) { buffer_.append(node.getTagName()); final Map<String, DomAttr> attributes; if (node instanceof HtmlScript) { attributes = getAttributesFor((HtmlScript) node); } else if (node instanceof HtmlImage) { attributes = getAttributesFor((HtmlImage) node); } else if (node instanceof HtmlLink) { attributes = getAttributesFor((HtmlLink) node); } else { attributes = node.getAttributesMap(); } for (final String name : attributes.keySet()) { buffer_.append(" "); buffer_.append(name); buffer_.append("=\""); buffer_.append(StringEscapeUtils.escapeXml(attributes.get(name).getNodeValue())); buffer_.append("\""); } }
String input; // [Node(X)][CHILD0(Y)][OBJECT1(A)][Key1(1)][Key2(2)]... String xml = new XmlSerializer(input).toXML(); System.out.println(xml);
protected Map<String, DomAttr> getAttributesFor(final HtmlImage image) throws IOException { final Map<String, DomAttr> map = createAttributesCopyWithClonedAttribute(image, "src"); final DomAttr srcAttr = map.get("src"); if (srcAttr != null && StringUtils.isNotBlank(srcAttr.getValue())) { final WebResponse response = image.getWebResponse(true); final File file = createFile(srcAttr.getValue(), "." + getSuffix(response)); try (InputStream inputStream = response.getContentAsStream()) { FileUtils.copyInputStreamToFile(inputStream, file); } final String valueOnFileSystem = outputDir_.getName() + FILE_SEPARATOR + file.getName(); srcAttr.setValue(valueOnFileSystem); // this is the clone attribute node, not the original one of the page } return map; }
/** * Returns a textual representation of this element that represents what would * be visible to the user if this page was shown in a web browser. For example, * a single-selection select element would return the currently selected value * as text. * * @return a textual representation of this element that represents what would * be visible to the user if this page was shown in a web browser */ public String asText() { if (getPage() instanceof XmlPage) { final XmlSerializer ser = new XmlSerializer(); return ser.asText(this); } final HtmlSerializer ser = new HtmlSerializer(); return ser.asText(this); }
private Map<String, DomAttr> getAttributesFor(final BaseFrameElement frame) throws IOException { final Map<String, DomAttr> map = createAttributesCopyWithClonedAttribute(frame, DomElement.SRC_ATTRIBUTE); final DomAttr srcAttr = map.get(DomElement.SRC_ATTRIBUTE); if (srcAttr == null) { return map; } final Page enclosedPage = frame.getEnclosedPage(); final String suffix = getFileExtension(enclosedPage); final File file = createFile(srcAttr.getValue(), "." + suffix); if (enclosedPage != null) { if (enclosedPage.isHtmlPage()) { file.delete(); // TODO: refactor as it is stupid to create empty file at one place // and then to complain that it already exists ((HtmlPage) enclosedPage).save(file); } else { try (InputStream is = enclosedPage.getWebResponse().getContentAsStream()) { try (OutputStream fos = Files.newOutputStream(file.toPath())) { IOUtils.copyLarge(is, fos); } } } } srcAttr.setValue(file.getParentFile().getName() + FILE_SEPARATOR + file.getName()); return map; }
protected Map<String, DomAttr> getAttributesFor(final HtmlLink link) throws IOException { final Map<String, DomAttr> map = createAttributesCopyWithClonedAttribute(link, "href"); final DomAttr hrefAttr = map.get("href"); if (hrefAttr != null && StringUtils.isNotBlank(hrefAttr.getValue())) { final String protocol = link.getWebRequest().getUrl().getProtocol(); if ("http".equals(protocol) || "https".equals(protocol)) { final File file = createFile(hrefAttr.getValue(), ".css"); FileUtils.writeStringToFile(file, link.getWebResponse(true, null).getContentAsString(), ISO_8859_1); hrefAttr.setValue(outputDir_.getName() + FILE_SEPARATOR + file.getName()); } } return map; }
protected void printXml(final DomElement node) { if (!isExcluded(node)) { final boolean hasChildren = node.getFirstChild() != null; buffer_.append(indent_).append('<'); printOpeningTag(node); if (!hasChildren && !isEmptyXmlTagExpanded(node)) { buffer_.append("/>").append('\n'); } else { buffer_.append(">").append('\n'); for (DomNode child = node.getFirstChild(); child != null; child = child.getNextSibling()) { indent_.append(" "); if (child instanceof DomElement) { printXml((DomElement) child); } else { buffer_.append(child); } indent_.setLength(indent_.length() - 2); } buffer_.append(indent_).append("</").append(node.getTagName()).append('>').append('\n'); } } }
protected void printXml(final DomElement node) throws IOException { if (!isExcluded(node)) { final boolean hasChildren = node.getFirstChild() != null; builder_.append(indent_).append('<'); printOpeningTag(node); if (!hasChildren && !node.isEmptyXmlTagExpanded()) { builder_.append("/>").append('\n'); } else { builder_.append('>').append('\n'); for (DomNode child = node.getFirstChild(); child != null; child = child.getNextSibling()) { indent_.append(" "); if (child instanceof DomElement) { printXml((DomElement) child); } else { builder_.append(child); } indent_.setLength(indent_.length() - 2); } builder_.append(indent_).append("</").append(node.getTagName()).append('>').append('\n'); } } }
protected Map<String, DomAttr> getAttributesFor(final HtmlLink link) { final Map<String, DomAttr> map = new HashMap<String, DomAttr>(link.getAttributesMap()); final String src = map.get("href").getValue(); try { final File file = createFile(src, ".css"); FileUtils.writeStringToFile(file, link.getWebResponse(true).getContentAsString()); map.get("href").setValue(outputDir_.getName() + File.separatorChar + file.getName()); } catch (final Exception e) { throw new RuntimeException(e); } return map; }
/** * Converts an HTML element to XML. * @param node a node * @return the text representation according to the setting of this serializer */ public String asXml(final HtmlElement node) { buffer_.setLength(0); indent_.setLength(0); String charsetName = null; if (node.getPage() instanceof HtmlPage) { charsetName = node.getPage().getPageEncoding(); } if (charsetName != null && node instanceof HtmlHtml) { buffer_.append("<?xml version=\"1.0\" encoding=\"").append(charsetName).append("\"?>").append('\n'); } printXml(node); final String response = buffer_.toString(); buffer_.setLength(0); return response; }
private Map<String, DomAttr> readAttributes(final DomElement node) throws IOException { if (node instanceof HtmlImage) { return getAttributesFor((HtmlImage) node); } else if (node instanceof HtmlLink) { return getAttributesFor((HtmlLink) node); } else if (node instanceof BaseFrameElement) { return getAttributesFor((BaseFrameElement) node); } Map<String, DomAttr> attributes = node.getAttributesMap(); if (node instanceof HtmlOption) { attributes = new HashMap<>(attributes); final HtmlOption option = (HtmlOption) node; if (option.isSelected()) { if (!attributes.containsKey("selected")) { attributes.put("selected", new DomAttr(node.getPage(), null, "selected", "selected", false)); } } else { attributes.remove("selected"); } } return attributes; }
var serializer = new XmlSerializer(type); object result;
protected Map<String, DomAttr> getAttributesFor(final HtmlImage image) throws IOException { final Map<String, DomAttr> map = createAttributesCopyWithClonedAttribute(image, DomElement.SRC_ATTRIBUTE); final DomAttr srcAttr = map.get(DomElement.SRC_ATTRIBUTE); if (srcAttr != null && StringUtils.isNotBlank(srcAttr.getValue())) { final WebResponse response = image.getWebResponse(true); final File file = createFile(srcAttr.getValue(), "." + getSuffix(response)); try (InputStream inputStream = response.getContentAsStream()) { FileUtils.copyInputStreamToFile(inputStream, file); } final String valueOnFileSystem = outputDir_.getName() + FILE_SEPARATOR + file.getName(); srcAttr.setValue(valueOnFileSystem); // this is the clone attribute node, not the original one of the page } return map; }
/** * Returns a textual representation of this element that represents what would * be visible to the user if this page was shown in a web browser. For example, * a single-selection select element would return the currently selected value * as text. * * @return a textual representation of this element that represents what would * be visible to the user if this page was shown in a web browser */ public String asText() { if (getPage() instanceof XmlPage) { final XmlSerializer ser = new XmlSerializer(); return ser.asText(this); } final HtmlSerializer ser = new HtmlSerializer(); return ser.asText(this); }