org.jsoup.nodes.Document.<init> java code examples

/**
 Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
 @return HTML
 */
public String html() {
  StringBuilder accum = new StringBuilder();
  
  try {
    html(accum, (new Document("")).outputSettings());
  } catch(IOException exception) {
    throw new SerializationException(exception);
  }
  return accum.toString();
}

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

/**
 Get the HTML representation of these attributes.
 @return HTML
 @throws SerializationException if the HTML representation of the attributes cannot be constructed.
 */
public String html() {
  StringBuilder accum = new StringBuilder();
  try {
    html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
  } catch (IOException e) { // ought never happen
    throw new SerializationException(e);
  }
  return accum.toString();
}

protected void initialiseParse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) {
  Validate.notNull(input, "String input must not be null");
  Validate.notNull(baseUri, "BaseURI must not be null");
  doc = new Document(baseUri);
  this.settings = settings;
  reader = new CharacterReader(input);
  this.errors = errors;
  currentToken = null;
  tokeniser = new Tokeniser(reader, errors);
  stack = new ArrayList<>(32);
  this.baseUri = baseUri;
}

Document.OutputSettings getOutputSettings() {
  Document owner = ownerDocument();
  return owner != null ? owner.outputSettings() : (new Document("")).outputSettings();
}

/**
 Create a valid, empty shell of a document, suitable for adding more elements to.
 @param baseUri baseUri of document
 @return document with html, head, and body elements.
 */
public static Document createShell(String baseUri) {
  Validate.notNull(baseUri);
  Document doc = new Document(baseUri);
  Element html = doc.appendElement("html");
  html.appendElement("head");
  html.appendElement("body");
  return doc;
}

static Document parseInputStream(InputStream input, String charsetName, String baseUri, Parser parser) throws IOException  {
  if (input == null) // empty body
    return new Document(baseUri);
  input = ConstrainableInputStream.wrap(input, bufferSize, 0);

public DesignContext() {
  this(new Document(""));
}

Document doc = new Document("");
DocumentType docType = new DocumentType("html", "", "", "");
doc.appendChild(docType);

public static Document get(String url) {
  int trys = 3;
  try {
    return get(url, trys);
  } catch (Exception e) {
  }
  // 4次请求之后无法解析返回空文档
  return new Document("");
}

public static Document proxyGet(String url, String ip, int port) {
  int trys = 3;
  try {
    return proxyGet(url, trys, ip, port);
  } catch (Exception e) {
    e.printStackTrace();
  }
  // 4次请求之后无法解析返回空文档
  return new Document("");
}

/**
 * Gets the outer HTML for the element.
 * <p>
 * This operation recursively iterates the element and all children and
 * should not be called unnecessarily.
 *
 * @return the outer HTML for the element
 */
public String getOuterHTML() {
  return ElementUtil.toJsoup(new Document(""), this).outerHtml();
}

public Document parse(String data, String baseUri) throws SAXException, IOException {
  InputSource source = new InputSource();
  source.setCharacterStream(new StringReader(data));
  SAXParser nekoParser = new SAXParser();
  Document document = new Document(baseUri);
  nekoParser.setContentHandler(new Handler(document));
  nekoParser.setErrorHandler(new LocalErrorHandler());
  nekoParser.parse(source);
  return document;
}

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

/**
 * Only document can be select
 * See: https://github.com/code4craft/webmagic/issues/113
 *
 * @param elementIterator elementIterator
 * @return element element
 */
private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
  Element element = elementIterator.next();
  if (!(element instanceof Document)) {
    Document root = new Document(element.ownerDocument().baseUri());
    Element clone = element.clone();
    root.appendChild(clone);
    elementIterator.set(root);
    return root;
  }
  return element;
}

public Document parse(InputStream data, String baseUri) throws SAXException, IOException {
  InputSource source = new InputSource();
  source.setByteStream(data);
  SAXParser nekoParser = new SAXParser();
  Document document = new Document(baseUri);
  nekoParser.setContentHandler(new Handler(document));
  nekoParser.setErrorHandler(new LocalErrorHandler());
  nekoParser.parse(source);
  return document;
}

public Component(Element elem, AttributesRequire attrs) throws Exception {
  Document doc = new Document("");
  doc.appendElement("body");
  doc.body().appendChild(elem);
  renderedElement = renderTemplate(doc, attrs);
}

public String toHtml() {
  Document doc = new Document("");
  doc.appendChild(toElement());
  RenderUtil.applyMessages(doc);
  RenderUtil.applyClearAction(doc, true);
  return doc.html();
}

static Document getBootstrapPage(BootstrapContext context) {
  Document document = new Document("");
  DocumentType doctype = new DocumentType("html", "", "",
      document.baseUri());

static Document postprocess(Element topNode) {
 Log.i("postprocess");
 Document doc = new Document("");
 if (topNode == null) {
  return doc;
 }
 removeNodesWithNegativeScores(topNode);
 replaceLineBreaksWithSpaces(topNode);
 removeUnlikelyChildNodes(topNode);
 removeTagsButRetainContent(topNode);
 removeTagsNotLikelyToBeParagraphs(topNode);
 removeTopLevelTagsNotLikelyToBeParagraphs(topNode);
 removeShortParagraphs(topNode);
 removeDisallowedAttributes(topNode);
 for (Node node : topNode.childNodes()) {
  doc.appendChild(node.clone());  // TODO: Don’t copy each item separately.
 }
 return doc;
}

Javadoc

Create a new, empty Document.

Popular in Java

Parsing JSON documents to java classes using gson
scheduleAtFixedRate (Timer)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getSharedPreferences (Context)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
CodeWhisperer alternatives

How to use org.jsoup.nodes.Documentconstructor

Best Java code snippets using org.jsoup.nodes.Document.<init> (Showing top 20 results out of 315)

How to use
org.jsoup.nodes.Document
constructor