public String serialize(CaptureSearchResult result) { String r = result.getFile(); return r == null ? DEFAULT_VALUE : r; } }
public String serialize(CaptureSearchResult result) { String r = result.getFile(); return r == null ? DEFAULT_VALUE : r; } }
@Override public String getDuplicatePayloadFile() { return (revisitPayload != null) ? revisitPayload.getFile() : null; }
@Override public String getDuplicatePayloadFile() { return (revisitPayload != null) ? revisitPayload.getFile() : null; }
public int filterObject(CaptureSearchResult o) { final String file = o.getFile(); for(String prefix : prefixes) { if(file.startsWith(prefix)) { return includeMatches ? FILTER_INCLUDE : FILTER_EXCLUDE; } } return includeMatches ? FILTER_EXCLUDE : FILTER_INCLUDE; }
public int filterObject(CaptureSearchResult o) { final String file = o.getFile(); for(String prefix : prefixes) { if(file.startsWith(prefix)) { return includeMatches ? FILTER_INCLUDE : FILTER_EXCLUDE; } } return includeMatches ? FILTER_EXCLUDE : FILTER_INCLUDE; }
public int filterObject(CaptureSearchResult o) { final String file = o.getFile(); for(Pattern pattern : patterns) { if(pattern.matcher(file).find()) { return FILTER_INCLUDE; } } return FILTER_EXCLUDE; } }
public int filterObject(CaptureSearchResult o) { final String file = o.getFile(); for(Pattern pattern : patterns) { if(pattern.matcher(file).find()) { return FILTER_INCLUDE; } } return FILTER_EXCLUDE; } }
public int filterObject(CaptureSearchResult o) { if(!o.getFile().startsWith(matchPrefix)) { return FILTER_INCLUDE; } return o.getCaptureDate().compareTo(embargoDate) < 0 ? FILTER_INCLUDE : FILTER_EXCLUDE; }
public int filterObject(CaptureSearchResult o) { if(!o.getFile().startsWith(matchPrefix)) { return FILTER_INCLUDE; } return o.getCaptureDate().compareTo(embargoDate) < 0 ? FILTER_INCLUDE : FILTER_EXCLUDE; }
public int filterObject(CaptureSearchResult o) { if(o.getFile().equals(EMPTY_VALUE) || o.getMimeType().equals(REVISIT_VALUE)) { return annotate(o); } return remember(o); } }
public int filterObject(CaptureSearchResult o) { if(o.getFile().equals(EMPTY_VALUE) || o.getMimeType().equals(REVISIT_VALUE)) { return annotate(o); } return remember(o); } }
public int filterObject(CaptureSearchResult o) { if(o.getFile().equals(EMPTY_VALUE)) { if(o.getDigest().equals(EMPTY_SHA1)) { return annotate(o); } return FILTER_INCLUDE; } return remember(o); }
public int filterObject(CaptureSearchResult o) { if(o.getFile().equals(EMPTY_VALUE)) { if(o.getDigest().equals(EMPTY_SHA1)) { return annotate(o); } return FILTER_INCLUDE; } return remember(o); }
@Override public Resource retrieveResource(CaptureSearchResult result) throws ResourceNotAvailableException { try { PerfStats.timeStart(PerfStat.WArcResource); if (isSkipped(result.getFile())) { throw new ResourceNotAvailableException( "Revisit: Skipping already failed " + result.getFile()); } try { return resourceStore.retrieveResource(result); } catch (ResourceNotAvailableException ex) { // Old code obtained archive filename via getDtails() method of // exception object, in the code handling SepcificCaptureReplayException. // Of two subclasses of SpecificCaptureReplayException, BadContentException // (only thrown from HttpHeaderOperation.copyHTTPMessageHeader()) never had // non-null details. So, this covers all cases, and more robust. addSkip(result.getFile()); throw ex; } } finally { PerfStats.timeEnd(PerfStat.WArcResource); } }
@Override public boolean matches(Object actual) { // CaptureSearchResult is compared by file name and offset. this is // how AccessPoint#retrievePayloadForIdenticalContentRevisit(...) // retrieves previous capture. // TODO: this could be defined as CaptureSearchResult#equals(Object). if (!(actual instanceof CaptureSearchResult)) return false; String file = ((CaptureSearchResult)actual).getFile(); long offset = ((CaptureSearchResult)actual).getOffset(); if (expected.getOffset() != offset) return false; return file == null ? expected.getFile() == null : file .equals(expected.getFile()); }
@Override public void appendTo(StringBuffer buffer) { buffer.append("eqCaptureSearchResult("); buffer.append(expected.getFile()); buffer.append(","); buffer.append(expected.getOffset()); buffer.append(")"); }
/** * Mark this capture as a revisit of previous capture {@code payload}, identified by content digest. * <p>Record location information is copied from {@code payload} so that the content can be * loaded from the record later.</p> * <p>{@link ResourceIndex} implementations should call this method before returning * {@code CaptureSearchResult}s to {@code AccessPoint}.</p> * @param payload capture being revisited * @see #getDuplicateDigestStoredTimestamp() * @see #getDuplicateDigestStoredDate() * @see #getDuplicatePayloadFile() * @see #getDuplicatePayloadOffset() * @see #getDuplicatePayloadCompressedLength() */ public void flagDuplicateDigest(CaptureSearchResult payload) { flagDuplicateDigest(); put(CAPTURE_DUPLICATE_STORED_TS, payload.getCaptureTimestamp()); put(CAPTURE_DUPLICATE_PAYLOAD_FILE, payload.getFile()); put(CAPTURE_DUPLICATE_PAYLOAD_OFFSET, String.valueOf(payload.getOffset())); if (payload.getCompressedLength() > 0) { put(CAPTURE_DUPLICATE_PAYLOAD_COMPRESSED_LENGTH, String.valueOf(payload.getCompressedLength())); } }
private int annotate(CaptureSearchResult o) { if(lastSeen == null) { // TODO: log missing record digest reference return FILTER_EXCLUDE; } o.setFile(lastSeen.getFile()); o.setOffset(lastSeen.getOffset()); o.setDigest(lastSeen.getDigest()); o.setHttpCode(lastSeen.getHttpCode()); o.setMimeType(lastSeen.getMimeType()); o.setRedirectUrl(lastSeen.getRedirectUrl()); o.flagDuplicateHTTP(lastSeen.getCaptureTimestamp()); return FILTER_INCLUDE; }
private int annotate(CaptureSearchResult o) { if(lastSeen == null) { // TODO: log missing record digest reference return FILTER_EXCLUDE; } o.setFile(lastSeen.getFile()); o.setOffset(lastSeen.getOffset()); o.setDigest(lastSeen.getDigest()); o.setHttpCode(lastSeen.getHttpCode()); o.setMimeType(lastSeen.getMimeType()); o.setRedirectUrl(lastSeen.getRedirectUrl()); o.flagDuplicateHTTP(lastSeen.getCaptureTimestamp()); return FILTER_INCLUDE; }