private void resetURI() { reqURIBuild.setHost(HOST + ":" + port); reqURIBuild.clearParameters(); }
URIBuilder uriBuilder = new URIBuilder(uriString); List<NameValuePair> urlParameters = uriBuilder.getQueryParams(); String uriWithoutParameters = uriBuilder.clearParameters().toString();
@VisibleForTesting String canonicalizeUrl(String url) { //TODO visit target web page and get header: // <link rel="canonical" href="https://blog.example.com/dresses/" /> String cleaned = url.replaceAll("[\r\n \t]*", ""); try { URI uri = new URI(cleaned); List<NameValuePair> params = URLEncodedUtils.parse(uri, Charsets.UTF_8); List<NameValuePair> cleanedParams = params.stream() .filter(pair -> !pair.getName().startsWith("utm_")) .sorted(Comparator.comparing(NameValuePair::getName) .thenComparing(NameValuePair::getValue)) .collect(toList()); URIBuilder uriBuilder = new URIBuilder(uri); if (cleanedParams.isEmpty()) { uriBuilder.clearParameters(); } else { //set empty list will cause builder always append `?` uriBuilder.setParameters(cleanedParams); } return uriBuilder.build().toString(); } catch (URISyntaxException e) { //ignore } return cleaned; }
uriBuilder.clearParameters(); uriBuilder.setParameters(queryParamsList); return uriBuilder.toString();
uriBuilder.clearParameters(); uriBuilder.setParameters(queryParamsList); return uriBuilder.toString();