String candidate; try { candidate = LaxURLCodec.DEFAULT.decode(keyVal[1]); } catch (DecoderException e) { continue;
/** * Get the 'form province' - either the configured (applicableSurtPrefix) * or inferred (full current server) range of URIs that is considered * covered by one form login * * @param curi * @return */ protected String getFormProvince(CrawlURI curi) { if (StringUtils.isNotBlank(getApplicableSurtPrefix())) { return getApplicableSurtPrefix(); } try { return curi.getUURI().resolve("/").getSurtForm(); } catch (URIException e) { logger.log(Level.WARNING,"error trimming to root",e); return curi.getClassKey(); // should never happen } }
res = (String) TextUtils.unescapeHtml(res); if (codebaseURI != null) { res = codebaseURI.resolve(res).toString();
public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: basic.canonicalize(url); ia.canonicalize(url); } }
public synchronized String getEscapedURI() { if (this.cachedEscapedURI == null) { this.cachedEscapedURI = super.getEscapedURI(); coalesceUriStrings(); } return this.cachedEscapedURI; }
/** * @param uri URI as string. * @return Instance of UURI. * @throws URIException */ protected UsableURI create(String uri) throws URIException { return create(uri, UsableURI.getDefaultProtocolCharset()); }
public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: google.canonicalize(url); ia.canonicalize(url); } }
public synchronized String getHost() throws URIException { if (this.cachedHost == null) { // If this._host is null, 3.0 httpclient throws // illegalargumentexception. Don't go there. if (this._host != null) { this.cachedHost = super.getHost(); coalesceHostAuthorityStrings(); } } return this.cachedHost; }
public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: basic.canonicalize(url); ia.canonicalize(url); } }
String prereq = curi.getUURI().resolve("/robots.txt").toString(); curi.markPrerequisite(prereq);
public synchronized String getEscapedURI() { if (this.cachedEscapedURI == null) { this.cachedEscapedURI = super.getEscapedURI(); coalesceUriStrings(); } return this.cachedEscapedURI; }
/** * @param uri URI as string. * @return Instance of UURI. * @throws URIException */ protected UsableURI create(String uri) throws URIException { return create(uri, UsableURI.getDefaultProtocolCharset()); }
public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: google.canonicalize(url); ia.canonicalize(url); } }
public synchronized String getHost() throws URIException { if (this.cachedHost == null) { // If this._host is null, 3.0 httpclient throws // illegalargumentexception. Don't go there. if (this._host != null) { this.cachedHost = super.getHost(); coalesceHostAuthorityStrings(); } } return this.cachedHost; }
retVal = LaxURLCodec.DEFAULT.decode(retVal); } catch (DecoderException e) { LOGGER.log(Level.INFO,"unable to decode",e);
public void canonicalize(HandyURL url) { basic.canonicalize(url); ia.canonicalize(url); } }
res = StringEscapeUtils.unescapeHtml(res); if (codebaseURI != null) { res = codebaseURI.resolve(res).toString();
public void canonicalize(HandyURL url) { basic.canonicalize(url); ia.canonicalize(url); } }
public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: basic.canonicalize(url); ia.canonicalize(url); } }
public void canonicalize(HandyURL url) { basic.canonicalize(url); ia.canonicalize(url); } }