private String toText(Object o) { if (o == null) { return ""; } if (o instanceof TagNode) { return ((TagNode) o).getText().toString(); } else { return o.toString(); } }
private String toText(Object o) { if (o == null) { return ""; } if (o instanceof TagNode) { return ((TagNode)o).getText().toString(); } else { return o.toString(); } }
/** * 获取指定标签的值 * @param tagNode * @param xPath * @return */ public static String getText(TagNode tagNode,String xPath){ return getTagNodeByXpath(tagNode, xPath).getText().toString(); } /**
/** * @return Text content of this node and it's subelements. */ public CharSequence getText() { StringBuilder text = new StringBuilder(); for (Object item :children) { if (item instanceof ContentNode) { text.append(((ContentNode) item).getContent()); } else if (item instanceof TagNode) { CharSequence subtext = ((TagNode) item).getText(); text.append(subtext); } } return text; }
/** * @return Text content of this node and it's subelements. */ public CharSequence getText() { StringBuilder text = new StringBuilder(); for (Object item : children) { if (item instanceof ContentNode) { text.append(((ContentNode) item).getContent()); } else if (item instanceof TagNode) { CharSequence subtext = ((TagNode) item).getText(); text.append(subtext); } } return text; }
/** * Extracts inner text of the tag which is first found by the <CODE>tagName</CODE>. * <P> * If tagName is null or empty, then the root element is used. * </P> * @param html * @param tagName the name of the tag including the root or null/empty for root tag * @return */ public static String getInnerText(String html, String tagName) { if (html == null) { return null; } String innerText = ""; TagNode targetNode = getTargetTagNode(html, tagName); if (targetNode != null) { innerText = targetNode.getText().toString(); } return innerText; }
protected void serializeEndTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException { if (!isForbiddenTag(tagNode)) { String tagName = tagNode.getName(); if (dontEscape(tagNode)) { // because we are not considering if the file is xhtml or html, // we need to put a javascript comment in front of the CDATA in case this is NOT xhtml if (!tagNode.getText().toString().trim().endsWith(CData.SAFE_END_CDATA)) { // // Insert a newline character before the CDATA end marker if there isn't one // already at the end of the tag node content // if (tagNode.getText().toString().length() > 0) { char lastchar = tagNode.getText().toString().charAt(tagNode.getText().toString().length() - 1); if (lastchar != '\n' && lastchar != '\r') writer.write("\n"); } // Write the CDATA end marker writer.write(CData.SAFE_END_CDATA); } } writer.write("</" + tagName + ">"); if (newLine) { writer.write("\n"); } } }
protected void serializeEndTag(TagNode tagNode, Writer writer, boolean newLine) throws IOException { if ( !isForbiddenTag(tagNode)) { String tagName = tagNode.getName(); if (dontEscape(tagNode)) { // because we are not considering if the file is xhtml or html, // we need to put a javascript comment in front of the CDATA in case this is NOT xhtml if (!tagNode.getText().toString().trim().endsWith(SAFE_END_CDATA)) { writer.write(SAFE_END_CDATA); } } writer.write( "</" + tagName + ">" ); if (newLine) { writer.write("\n"); } } }
/** * 根据指定的xpath,从tagNode中选择具体的标签Text * * @param tagNode * @param xpath * @return */ public static String getTextByXpath(TagNode tagNode, String xpath) { Object[] objs = null; try { objs = tagNode.evaluateXPath(xpath); if (objs != null && objs.length > 0) { TagNode titleNode = (TagNode) objs[0]; return titleNode.getText().toString().trim(); } } catch (XPatherException e) { e.printStackTrace(); } return null; }
} else if ( "text".equals(name) ) { if (curr instanceof TagNode) { result.add( ((TagNode)curr).getText() ); } else if (curr instanceof String) { result.add( curr.toString() ); Object elem = it.next(); if (elem instanceof TagNode) { result.add( ((TagNode)elem).getText() ); } else if (elem instanceof String) { result.add( elem.toString() );
} else if ("text".equals(name)) { if (curr instanceof TagNode) { result.add(((TagNode) curr).getText()); } else if (curr instanceof String) { result.add(curr.toString()); Object elem = it.next(); if (elem instanceof TagNode) { result.add(((TagNode) elem).getText()); } else if (elem instanceof String) { result.add(elem.toString());
if (!tagNode.getText().toString().startsWith(SAFE_BEGIN_CDATA)) { writer.write(SAFE_BEGIN_CDATA);
for (Object object : evaluateXPath) { TagNode trNode = (TagNode)object; if(!trNode.getText().toString().trim().equals("")){//把tr为空的标签过滤掉 JSONObject jsonObject2 = new JSONObject(); Object[] evaluateXPath2 = trNode.evaluateXPath("//th"); jsonObject2.put("value", thNode.getText().toString()); }else{ evaluateXPath2 = trNode.evaluateXPath("//td"); jsonObject2.put("name", tdNode1.getText().toString()); jsonObject2.put("value", tdNode2.getText().toString());
String text = tagNode.getText().toString(); // "\n\n1\n/100\n" Pattern pattern = Pattern.compile("[0-9]{2,3}"); Matcher matcher = pattern.matcher(text);
private boolean isEmpty(final TagNode node) { for (BaseToken item : node.getAllChildren()) { if (item instanceof TagNode) { final TagNode childNode = (TagNode)item; if (Arrays.stream(VALID_ELEMENTS).anyMatch(e -> childNode.getName().equalsIgnoreCase(e)) || CharMatcher.invisible().negate().matchesAnyOf(childNode.getText()) || !isEmpty(childNode)) { return false; } } else if (item instanceof ContentNode) { if (CharMatcher.invisible().negate().matchesAnyOf(((ContentNode) item).getContent())) { return false; } } } return true; } }
documentPath); final ContentNode textContent = new ContentNode(anchorTag.getText().toString()); anchorTag.getParent().insertChildAfter(anchorTag, textContent); anchorTag.getParent().removeChild(anchorTag);
CharSequence contentString = tagNode.getText(); if(isEmptyString(contentString)) {
CharSequence contentString = tagNode.getText(); if (isEmptyString(contentString)) {