private static String modifyDummyString (String dummyString, int beginTag, int endTag) { String dummyStringInterval = createDummyString ('*', endTag-beginTag); return new String(dummyString.substring(0, beginTag) + dummyStringInterval + dummyString.substring(endTag, dummyString.length())); }
public static String removeEscapeCharacters(String inputString) { inputString = ParserUtils.removeChars(inputString, '\r'); inputString = ParserUtils.removeChars(inputString, '\n'); inputString = ParserUtils.removeChars(inputString, '\t'); return inputString; }
/** * Split the input string in a string array, * considering the tags as delimiter for splitting. * <BR>Use NodeFilter class as input parameter * instead of tags[] string array. * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String[] splitTags (String input, NodeFilter filter) throws ParserException, UnsupportedEncodingException { return splitTags (input, filter, true, true); }
String dummyString = createDummyString (' ', input.length()); NodeList links = getLinks (input, filter, recursive); for (int j=0; j<links.size(); j++) dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd); dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin); dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);
/** * Trim all tags in the input string and * return a string like the input one * without the tags and their content. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, String[] tags) throws ParserException, UnsupportedEncodingException { return trimTags (input, tags, true, true); }
parser = createParserParsingAnInputString(output); links = parser.extractAllNodesThatMatch(filter);
public String toPlainTextString() { return ParserUtils.removeEscapeCharacters(delegate.toPlainTextString()); } }
String dummyString = createDummyString (' ', input.length()); NodeList links = getLinks (inputModified, tags[i], recursive); for (int j=0; j<links.size(); j++) dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd); dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin); dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd); dummyString = createDummyString (' ', inputModified.length());
/** * Trim all tags in the input string and * return a string like the input one * without the tags and their content. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, String[] tags) throws ParserException, UnsupportedEncodingException { return trimTags (input, tags, true, true); }
parser = createParserParsingAnInputString(output); links = parser.extractAllNodesThatMatch(filter);
String dummyString = createDummyString (' ', input.length()); NodeList links = getLinks (input, filter, recursive); for (int j=0; j<links.size(); j++) dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd); dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin); dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);
/** * Split the input string in a string array, * considering the tags as delimiter for splitting. * <BR>Use NodeFilter class as input parameter * instead of tags[] string array. * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String[] splitTags (String input, NodeFilter filter) throws ParserException, UnsupportedEncodingException { return splitTags (input, filter, true, true); }
/** * Trim all tags in the input string and * return a string like the input one * without the tags and their content. * <BR>Use NodeFilter class as input parameter * instead of tags[] string array. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, NodeFilter filter) throws ParserException, UnsupportedEncodingException { return trimTags (input, filter, true, true); }
public static String removeEscapeCharacters(String inputString) { inputString = ParserUtils.removeChars(inputString, '\r'); inputString = ParserUtils.removeChars(inputString, '\n'); inputString = ParserUtils.removeChars(inputString, '\t'); return inputString; }
NodeFilter filterLink = new TagNameFilter (tag); NodeList links = new NodeList (); parser = createParserParsingAnInputString(output); links = parser.extractAllNodesThatMatch(filterLink);
private static String modifyDummyString (String dummyString, int beginTag, int endTag) { String dummyStringInterval = createDummyString ('*', endTag-beginTag); return new String(dummyString.substring(0, beginTag) + dummyStringInterval + dummyString.substring(endTag, dummyString.length())); }
String output = new String(); String dummyString = createDummyString (' ', input.length()); NodeList links = getLinks (input, filter, recursive); for (int j=0; j<links.size(); j++) dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagEnd); dummyString = modifyDummyString (new String(dummyString), beginTagBegin, endTagBegin); dummyString = modifyDummyString (new String(dummyString), beginTagEnd, endTagEnd);
/** * Split the input string in a string array, * considering the tags as delimiter for splitting. * @see ParserUtils#splitTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String[] splitTags (String input, String[] tags) throws ParserException, UnsupportedEncodingException { return splitTags (input, tags, true, true); }
/** * Trim all tags in the input string and * return a string like the input one * without the tags and their content. * <BR>Use NodeFilter class as input parameter * instead of tags[] string array. * @see ParserUtils#trimTags (String input, String[] tags, boolean recursive, boolean insideTag). */ public static String trimTags (String input, NodeFilter filter) throws ParserException, UnsupportedEncodingException { return trimTags (input, filter, true, true); }
/** * Extract the link from the HREF attribute. * @return The URL from the HREF attibute. This is absolute if the tag has * a valid page. */ public String extractLink () { String ret; ret = getAttribute ("HREF"); if (null != ret) { ret = ParserUtils.removeChars (ret,'\n'); ret = ParserUtils.removeChars (ret,'\r'); } if (null != getPage ()) ret = getPage ().getAbsoluteURL (ret); return (ret); } }