org.htmlcleaner.TagNode.getText java code examples

private String toText(Object o) {
  if (o == null) {
    return "";
  }
  if (o instanceof TagNode) {
    return ((TagNode) o).getText().toString();
  } else {
    return o.toString();
  }
}

private String toText(Object o) {
  if (o == null) {
    return "";
  } if (o instanceof TagNode) {
    return ((TagNode)o).getText().toString();
  } else {
    return o.toString();
  }
}

/**
 * 获取指定标签的值
 * @param tagNode
 * @param xPath
 * @return
 */
public static String getText(TagNode tagNode,String xPath){
  return getTagNodeByXpath(tagNode, xPath).getText().toString();
}
/**

/**
 * @return Text content of this node and it's subelements.
 */
public CharSequence getText() {
  StringBuilder text = new StringBuilder();
  for (Object item :children) {
    if (item instanceof ContentNode) {
      text.append(((ContentNode) item).getContent());
    } else if (item instanceof TagNode) {
      CharSequence subtext = ((TagNode) item).getText();
      text.append(subtext);
    }
  }
  return text;
}

/**
 * @return Text content of this node and it's subelements.
 */
public CharSequence getText() {
  StringBuilder text = new StringBuilder();
  for (Object item : children) {
    if (item instanceof ContentNode) {
      text.append(((ContentNode) item).getContent());
    } else if (item instanceof TagNode) {
      CharSequence subtext = ((TagNode) item).getText();
      text.append(subtext);
    }
  }
  return text;
}

/**
 * Extracts inner text of the tag which is first found by the <CODE>tagName</CODE>.
 * <P>
 * If tagName is null or empty, then the root element is used.
 * </P>
 * @param html
 * @param tagName the name of the tag including the root or null/empty for root tag
 * @return
 */
public static String getInnerText(String html, String tagName) {
  if (html == null) {
    return null;
  }
  
  String innerText = "";
  
  TagNode targetNode = getTargetTagNode(html, tagName);
  
  if (targetNode != null) {
    innerText = targetNode.getText().toString();
  }
  
  return innerText;
}

protected void serializeEndTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException {
  if (!isForbiddenTag(tagNode)) {
    String tagName = tagNode.getName();
    if (dontEscape(tagNode)) {
      // because we are not considering if the file is xhtml or html,
      // we need to put a javascript comment in front of the CDATA in case this is NOT xhtml
      if (!tagNode.getText().toString().trim().endsWith(CData.SAFE_END_CDATA)) {
        //
        // Insert a newline character before the CDATA end marker if there isn't one
        // already at the end of the tag node content
        //
        if (tagNode.getText().toString().length() > 0) {
          char lastchar = tagNode.getText().toString().charAt(tagNode.getText().toString().length() - 1);
          if (lastchar != '\n' && lastchar != '\r') writer.write("\n");
        }
        // Write the CDATA end marker
        writer.write(CData.SAFE_END_CDATA);
      }
    }
    writer.write("</" + tagName + ">");
    if (newLine) {
      writer.write("\n");
    }
  }
}

protected void serializeEndTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException {
  if ( !isForbiddenTag(tagNode)) {
    String tagName = tagNode.getName();
    if (dontEscape(tagNode)) {
      // because we are not considering if the file is xhtml or html,
      // we need to put a javascript comment in front of the CDATA in case this is NOT xhtml
      if (!tagNode.getText().toString().trim().endsWith(SAFE_END_CDATA)) {
        writer.write(SAFE_END_CDATA);
      }
    }
    writer.write( "</" + tagName + ">" );
    if (newLine) {
      writer.write("\n");
    }
  }
}

/**
 * 根据指定的xpath，从tagNode中选择具体的标签Text
 *
 * @param tagNode
 * @param xpath
 * @return
 */
public static String getTextByXpath(TagNode tagNode, String xpath) {
  Object[] objs = null;
  try {
    objs = tagNode.evaluateXPath(xpath);
    if (objs != null && objs.length > 0) {
      TagNode titleNode = (TagNode) objs[0];
      return titleNode.getText().toString().trim();
    }
  } catch (XPatherException e) {
    e.printStackTrace();
  }
  return null;
}

} else if ( "text".equals(name) ) {
  if (curr instanceof TagNode) {
    result.add( ((TagNode)curr).getText() );
  } else if (curr instanceof String) {
    result.add( curr.toString() );
    Object elem = it.next();
    if (elem instanceof TagNode) {
      result.add( ((TagNode)elem).getText() );
    } else if (elem instanceof String) {
      result.add( elem.toString() );

} else if ("text".equals(name)) {
  if (curr instanceof TagNode) {
    result.add(((TagNode) curr).getText());
  } else if (curr instanceof String) {
    result.add(curr.toString());
    Object elem = it.next();
    if (elem instanceof TagNode) {
      result.add(((TagNode) elem).getText());
    } else if (elem instanceof String) {
      result.add(elem.toString());

if (objs != null && objs.length > 0) {
  TagNode paramTitleNode = (TagNode) objs[0];
  paramTitle = paramTitleNode.getText().toString();
    dlJsonObject.put(childTagTitle.getText().toString().trim(), chileTagValue.getText().toString().trim());

if (!tagNode.getText().toString().startsWith(CData.SAFE_BEGIN_CDATA)) {
  writer.write(CData.SAFE_BEGIN_CDATA);
  if (!tagNode.getText().toString().equals("")) {
    char firstchar = tagNode.getText().toString().charAt(0);
    if (firstchar != '\n' && firstchar != '\r') writer.write("\n");

if (!tagNode.getText().toString().startsWith(SAFE_BEGIN_CDATA)) {
  writer.write(SAFE_BEGIN_CDATA);

for (Object object : evaluateXPath) {
  TagNode trNode = (TagNode)object;
  if(!trNode.getText().toString().trim().equals("")){//把tr为空的标签过滤掉
    JSONObject jsonObject2 = new JSONObject();
    Object[] evaluateXPath2 = trNode.evaluateXPath("//th");
      jsonObject2.put("value", thNode.getText().toString());
    }else{
      evaluateXPath2 = trNode.evaluateXPath("//td");
      jsonObject2.put("name", tdNode1.getText().toString());
      jsonObject2.put("value", tdNode2.getText().toString());

String text = tagNode.getText().toString(); // "\n\n1\n/100\n"
Pattern pattern = Pattern.compile("[0-9]{2,3}");
Matcher matcher = pattern.matcher(text);

  private boolean isEmpty(final TagNode node) {
    for (BaseToken item : node.getAllChildren()) {
      if (item instanceof TagNode) {
        final TagNode childNode = (TagNode)item;
        if (Arrays.stream(VALID_ELEMENTS).anyMatch(e -> childNode.getName().equalsIgnoreCase(e))
            || CharMatcher.invisible().negate().matchesAnyOf(childNode.getText())
            || !isEmpty(childNode)) {
          return false;
        }
      } else if (item instanceof ContentNode) {
        if (CharMatcher.invisible().negate().matchesAnyOf(((ContentNode) item).getContent())) {
          return false;
        }
      }
    }
    return true;
  }
}

    documentPath);
final ContentNode textContent = new ContentNode(anchorTag.getText().toString());
anchorTag.getParent().insertChildAfter(anchorTag, textContent);
anchorTag.getParent().removeChild(anchorTag);

CharSequence contentString = tagNode.getText();
if(isEmptyString(contentString)) {

CharSequence contentString = tagNode.getText();
if (isEmptyString(contentString)) {

Popular methods of TagNode

getAttributeByName
getName
getAttributes
getAllChildren
addAttribute
evaluateXPath
Evaluates XPath expression on give node. This is not fully supported XPath parser and evaluator. Ex
hasAttribute
removeAttribute
removeChild
getElementsByName
<init>
findElementByName

Popular in Java

Reading from database using SQL prepared statement
startActivity (Activity)
notifyDataSetChanged (ArrayAdapter)
onRequestPermissionsResult (Fragment)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Reference (javax.naming)
Best IntelliJ plugins

How to use getTextmethodin org.htmlcleaner.TagNode

Best Java code snippets using org.htmlcleaner.TagNode.getText (Showing top 20 results out of 315)

How to use
getText
method
in
org.htmlcleaner.TagNode