/** * whether this Resource is {@code server-not-modified} revisit. * (this method used to be {@code AccessPoint#isWarcRevisitNotModified(Resource)}. * Not made a part of {@code Resource} interface because it was unused.) * @return {@code true} if it is */ public boolean isRevisitNotModified() { Map<String, Object> warcHeaders = getWarcHeaders().getHeaderFields(); String warcProfile = (String)warcHeaders.get("WARC-Profile"); return PROFILE_REVISIT_SERVER_NOT_MODIFIED.equals(warcProfile); } }
/** * whether this Resource is {@code server-not-modified} revisit. * (this method used to be {@code AccessPoint#isWarcRevisitNotModified(Resource)}. * Not made a part of {@code Resource} interface because it was unused.) * @return {@code true} if it is */ public boolean isRevisitNotModified() { Map<String, Object> warcHeaders = getWarcHeaders().getHeaderFields(); String warcProfile = (String)warcHeaders.get("WARC-Profile"); return PROFILE_REVISIT_SERVER_NOT_MODIFIED.equals(warcProfile); } }
public String getRefersToTargetURI() { return (String)getWarcHeaders().getHeaderFields().get( "WARC-Refers-To-Target-URI"); }
public String getRefersToTargetURI() { return (String)getWarcHeaders().getHeaderFields().get( "WARC-Refers-To-Target-URI"); }
public static byte[] toBytes(WARCRecord record) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dout = new DataOutputStream(baos); dout.write("WARC/0.17\n".getBytes()); for (Map.Entry<String, Object> entry : record.getHeader().getHeaderFields().entrySet()) { dout.write((entry.getKey() + ": " + entry.getValue().toString() + "\n").getBytes()); } dout.write("\n".getBytes()); record.dump(dout); return baos.toByteArray(); }
public String getRefersToDate() { String dateString = (String)getWarcHeaders().getHeaderFields().get( "WARC-Refers-To-Date"); if (dateString != null) { Date date = ArchiveUtils.parse14DigitISODate(dateString, null); if (date != null) { return ArchiveUtils.get14DigitDate(date); } } return null; }
public String getRefersToDate() { String dateString = (String)getWarcHeaders().getHeaderFields().get( "WARC-Refers-To-Date"); if (dateString != null) { Date date = ArchiveUtils.parse14DigitISODate(dateString, null); if (date != null) { return ArchiveUtils.get14DigitDate(date); } } return null; }
if( !header.getHeaderFields().isEmpty() ) { newKey = (header.getDate().replaceAll("[^0-9]", "")) + "/" + header.getUrl();
public int read(byte[] b, int offset, int length) throws IOException { int read = Math.min(length, available()); if (read == -1 || read == 0) { read = -1; } else { read = this.in.read(b, offset, read); if (read == -1) { String msg = "Premature EOF before end-of-record: " + getHeader().getHeaderFields(); if (isStrict()) { throw new IOException(msg); } setEor(true); System.err.println(Level.WARNING.toString() + " " + msg); } if (this.digest != null && read >= 0) { this.digest.update(b, offset, read); } incrementPosition(read); } return read; }
public int read(byte[] b, int offset, int length) throws IOException { int read = Math.min(length, available()); if (read == -1 || read == 0) { read = -1; } else { read = this.in.read(b, offset, read); if (read == -1) { String msg = "Premature EOF before end-of-record: " + getHeader().getHeaderFields(); if (isStrict()) { throw new IOException(msg); } setEor(true); System.err.println(Level.WARNING.toString() + " " + msg); } if (this.digest != null && read >= 0) { this.digest.update(b, offset, read); } incrementPosition(read); } return read; }
Map<String, Object> headerFields = warc.getHeader().getHeaderFields();
public int read(byte[] b, int offset, int length) throws IOException { int read = Math.min(length, available()); if (read == -1 || read == 0) { read = -1; } else { read = this.in.read(b, offset, read); if (read == -1) { String msg = "Premature EOF before end-of-record: " + getHeader().getHeaderFields(); if (isStrict()) { throw new IOException(msg); } setEor(true); System.err.println(Level.WARNING.toString() + " " + msg); } if (this.digest != null && read >= 0) { this.digest.update(b, offset, read); } incrementPosition(read); } return read; }
if( !header.getHeaderFields().isEmpty() ) { newKey = (header.getDate().replaceAll("[^0-9]", "")) + "/" + header.getUrl();
" when only " + LINE_SEPARATOR + " expected. " + getReaderIdentifier() + ((h != null)? h.getHeaderFields().toString(): ""));
" when only " + LINE_SEPARATOR + " expected. " + getReaderIdentifier() + ((h != null)? h.getHeaderFields().toString(): ""));
" when only " + LINE_SEPARATOR + " expected. " + getReaderIdentifier() + ((h != null)? h.getHeaderFields().toString(): ""));
"+"+record.available()+","+header.getLength()+ ": "+header.getUrl()); for( String h : header.getHeaderFields().keySet()) { log.debug("ArchiveHeader: "+h+" -> "+header.getHeaderValue(h));
"+"+record.available()+","+header.getLength()+ ": "+header.getUrl()); for( String h : header.getHeaderFields().keySet()) { log.debug("ArchiveHeader: "+h+" -> "+header.getHeaderValue(h));
if (!header.getHeaderFields().isEmpty()) { solr = indexer.extract(key.toString(), value.getRecord());
final String url = Normalisation.sanitiseWARCHeaderValue(header.getUrl()); try { if (!header.getHeaderFields().isEmpty()) {