public static String robotsUrlForUrl(String url) throws URIException { LaxURI uri = new LaxURI(url, false); uri.setPath("/robots.txt"); uri.setQuery(null); uri.setFragment(null); return uri.toString(); }
public String getPathQuery() throws URIException { char[] rawPathQuery = getRawPathQuery(); return (rawPathQuery == null) ? null : decode(rawPathQuery, getProtocolCharset()); } // overridden to use this class's static decode()
public String getPath() throws URIException { char[] p = getRawPath(); return (p == null) ? null : decode(p, getProtocolCharset()); }
if (validate(firstDelimiter, delims)) { if (length >= 2) { char[] lastDelimiter = { tmp.charAt(length - 1) }; if (validate(lastDelimiter, delims)) { tmp = tmp.substring(1, length - 1); length = length - 2; int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); if (at == -1) { at = 0; if (validate(target, scheme)) { _scheme = target; from = ++at; int next = indexFirstOf(tmp, "/?#", at + 2); if (next == -1) { next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length(); parseAuthority(tmp.substring(at + 2, next), escaped); from = at = next; int next = indexFirstOf(tmp, "?#", from); if (next == -1) { next = tmp.length(); && prevalidate(tmp.substring(from, next), disallowed_rel_path) || escaped
/** * Returns true if a robot with the given user-agent is allowed to access * the given url. * * @param url * @param userAgent * @return * @throws IOException * @throws RobotsUnavailableException */ public boolean isRobotPermitted(String url, String userAgent) throws IOException, RobotsUnavailableException { RobotRules rules = getRulesForUrl(url, userAgent); return !rules.blocksPathForUA(new LaxURI(url, false).getPath(), userAgent); }
/** * Returns an alternate, functional String representation -- in this * case, a String of the URI represented by this UURI instance. * * @return */ public synchronized String toCustomString() { if (this.cachedString == null) { this.cachedString = super.toString(); coalesceUriStrings(); } return this.cachedString; }
public String getURI() throws URIException { return (_uri == null) ? null : decode(_uri, getProtocolCharset()); }
LaxURI lURI = new LaxURI(url,true); getMethod = new ExtendedGetMethod(url,recorder); getMethod.setURI(lURI);
public synchronized String getEscapedURI() { if (this.cachedEscapedURI == null) { this.cachedEscapedURI = super.getEscapedURI(); coalesceUriStrings(); } return this.cachedEscapedURI; }
public synchronized String getHost() throws URIException { if (this.cachedHost == null) { // If this._host is null, 3.0 httpclient throws // illegalargumentexception. Don't go there. if (this._host != null) { this.cachedHost = super.getHost(); coalesceHostAuthorityStrings(); } } return this.cachedHost; }
protected static String decode(char[] component, String charset) throws URIException { if (component == null) { throw new IllegalArgumentException( "Component array of chars may not be null"); } return decode(new String(component), charset); }
if (validate(firstDelimiter, delims)) { if (length >= 2) { char[] lastDelimiter = { tmp.charAt(length - 1) }; if (validate(lastDelimiter, delims)) { tmp = tmp.substring(1, length - 1); length = length - 2; int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); if (at == -1) { at = 0; if (validate(target, scheme)) { _scheme = target; from = ++at; int next = indexFirstOf(tmp, "/?#", at + 2); if (next == -1) { next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length(); parseAuthority(tmp.substring(at + 2, next), escaped); from = at = next; int next = indexFirstOf(tmp, "?#", from); if (next == -1) { next = tmp.length(); && prevalidate(tmp.substring(from, next), disallowed_rel_path) || escaped
/** * Returns true if a robot with the given user-agent is allowed to access * the given url. * * @param url * @param userAgent * @return * @throws IOException * @throws RobotsUnavailableException */ public boolean isRobotPermitted(String url, String userAgent) throws IOException, RobotsUnavailableException { RobotRules rules = getRulesForUrl(url, userAgent); return !rules.blocksPathForUA(new LaxURI(url, false).getPath(), userAgent); }
/** * Returns an alternate, functional String representation -- in this * case, a String of the URI represented by this UURI instance. * * @return */ public synchronized String toCustomString() { if (this.cachedString == null) { this.cachedString = super.toString(); coalesceUriStrings(); } return this.cachedString; }
public String getURI() throws URIException { return (_uri == null) ? null : decode(_uri, getProtocolCharset()); }
LaxURI lURI = new LaxURI(url,true); getMethod = new ExtendedGetMethod(url,recorder); getMethod.setURI(lURI);
public synchronized String getEscapedURI() { if (this.cachedEscapedURI == null) { this.cachedEscapedURI = super.getEscapedURI(); coalesceUriStrings(); } return this.cachedEscapedURI; }
public synchronized String getHost() throws URIException { if (this.cachedHost == null) { // If this._host is null, 3.0 httpclient throws // illegalargumentexception. Don't go there. if (this._host != null) { this.cachedHost = super.getHost(); coalesceHostAuthorityStrings(); } } return this.cachedHost; }
protected static String decode(char[] component, String charset) throws URIException { if (component == null) { throw new IllegalArgumentException( "Component array of chars may not be null"); } return decode(new String(component), charset); }
if (validate(firstDelimiter, delims)) { if (length >= 2) { char[] lastDelimiter = { tmp.charAt(length - 1) }; if (validate(lastDelimiter, delims)) { tmp = tmp.substring(1, length - 1); length = length - 2; int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); if (at == -1) { at = 0; if (validate(target, scheme)) { _scheme = target; from = ++at; int next = indexFirstOf(tmp, "/?#", at + 2); if (next == -1) { next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length(); parseAuthority(tmp.substring(at + 2, next), escaped); from = at = next; int next = indexFirstOf(tmp, "?#", from); if (next == -1) { next = tmp.length(); && prevalidate(tmp.substring(from, next), disallowed_rel_path) || escaped