@Override public String sniff(Resource resource) { byte[] bbuffer = new byte[MAX_BOM_LEN]; resource.mark(MAX_BOM_LEN); try { resource.read(bbuffer, 0, MAX_BOM_LEN); resource.reset(); } catch (IOException ex) { return null; } if (bbuffer[0] == (byte)0xFE && bbuffer[1] == (byte)0xFF) return "UTF-16BE"; if (bbuffer[0] == (byte)0xFF && bbuffer[1] == (byte)0xFE) return "UTF-16LE"; if (bbuffer[0] == (byte)0xEF && bbuffer[1] == (byte)0xBB && bbuffer[2] == (byte)0xBF) return "UTF-8"; return null; } }
@Override public void close() throws IOException { // TODO: should call close on both? payloadResource.close(); }
/** * Read first {@code sniffLength} bytes of {@code resource}'s payload, * decoding {@code Content-Encoding} if any. Reset {@code resource}'s * read position back to zero. * @param resource Resource to load bytes from * @return bytes, zero-padded if payload is shorter. * @throws IOException */ protected byte[] peekContent(Resource resource) throws IOException { byte[] bbuffer = new byte[Math.max(sniffLength, MINIMUM_SNIFF_BUFFER_SIZE)]; String encoding = resource.getHeader("content-encoding"); if ("gzip".equalsIgnoreCase(encoding) || "x-gzip".equalsIgnoreCase(encoding)) { // use larger readlimit, because gzip-ed data can be larger than the original // at low compression level. resource.mark(sniffLength + 100); @SuppressWarnings("resource") Resource z = new GzipDecodingResource(resource); z.read(bbuffer, 0, sniffLength); resource.reset(); } else { resource.mark(sniffLength); resource.read(bbuffer, 0, sniffLength); resource.reset(); } return bbuffer; }
@Override public Map<String, String> getHttpHeaders() { // revisit record had no HTTP headers in early days. if (headersResource.getRecordLength() == 0) return payloadResource.getHttpHeaders(); else return headersResource.getHttpHeaders(); } @Override
@Override public String getHeader(String headerName) { // revisit record had no HTTP headers in early days. if (headersResource.getRecordLength() == 0) return payloadResource.getHeader(headerName); else return headersResource.getHeader(headerName); } @Override
/** * plain HTTP response (without any transfer/content-encoding) * @throws Exception */ public void testPlainHttpRecord() throws Exception { String payload = "hogehogehogehogehoge"; WARCRecordInfo recinfo = TestWARCRecordInfo.createHttpResponse(payload); Resource res = createResource(recinfo); res.parseHeaders(); assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", "text/plain", res.getHeader("Content-Type")); byte[] buf = new byte[payload.getBytes().length + 1]; int n = res.read(buf); assertEquals("content length", buf.length - 1, n); res.close(); } /**
public void testUrlAgnosticRevisitRecord() throws Exception { final String ctype = "text/html"; WARCRecordInfo recinfo = TestWARCRecordInfo .createUrlAgnosticRevisitHttpResponse(ctype, 1345); Resource res = createResource(recinfo); res.parseHeaders(); // these are from this record. assertEquals("statusCode", 200, res.getStatusCode()); assertEquals("content-type", ctype, res.getHeader("Content-Type")); assertEquals("http://example.com/", res.getRefersToTargetURI()); assertEquals("20140101101010", res.getRefersToDate()); StandardCharsetDetector csd = new StandardCharsetDetector(); // assuming WaybackRequest (3rd parameter) is not used in getCharset() csd.getCharset(res, res, null); res.close(); }
@Override public int read(byte[] b, int off, int len) throws IOException { return payloadResource.read(b, off, len); } @Override
status = origResource.getStatusCode(); if (origResource != null) { try { origResource.close(); } catch (IOException e) {
@Override public void parseHeaders() throws IOException { // currently this is not supposed to be used. // it is assumed parseHeaders() is already // called on each Resource. headersResource.parseHeaders(); payloadResource.parseHeaders(); } @Override
@Override public Map<String, String> getHttpHeaders() { return source.getHttpHeaders(); }
resource, result, uriConverter, httpHeaderProcessor); String origLength = HttpHeaderOperation.getContentLength(resource.getHttpHeaders()); if (origLength != null) { HttpHeaderOperation.replaceHeader(headers, HttpHeaderOperation.HTTP_LENGTH_HEADER, origLength); byte[] buffer = new byte[BUFFER_SIZE]; long total = 0; for (int r = -1; (r = resource.read(buffer, 0, BUFFER_SIZE)) != -1;) { os.write(buffer, 0, r); total += r;
@Override public int getStatusCode() { return headersResource.getStatusCode(); }
protected boolean isSelfRedirect(Resource resource, CaptureSearchResult closest, WaybackRequest wbRequest, String canonRequestURL) { int status = resource.getStatusCode(); String location = resource.getHeader("Location");
throw new IOException(e); httpResponse.setStatus(res.getStatusCode()); Map<String,String> headers = res.getHttpHeaders(); Iterator<String> keys = headers.keySet().iterator(); while(keys.hasNext()) {
String payloadUri = revisitRecord.getRefersToTargetURI(); String payloadTimestamp = revisitRecord.getRefersToDate();
public static void detectFile(MimeTypeDetector detector, File file) { try { InputStream is = new FileInputStream(file); Resource resource = JWATResource.getResource(is, 0); String contentType = resource.getHeader("content-type"); if (contentType == null) contentType = "-"; else { int p = contentType.indexOf(';'); if (p >= 0) { contentType = contentType.substring(0, p).trim(); } } String mimeType = detector.sniff(resource); if (mimeType == null) mimeType = "-"; System.out.println(file.getPath() + "\t" + contentType + "\t" + mimeType); } catch (Exception ex) { System.out.println(file.getPath() + "\t" + "-" + "\tERROR " + ex.getMessage()); } } }
if (httpHeadersResource.getRecordLength() <= 0) { httpHeadersResource.close(); httpHeadersResource = payloadResource;
if (res.getRefersToDate() != null) { String refTimestamp = res.getRefersToDate(); for (CaptureSearchResult r : results.getResults()) { if (r.getCaptureTimestamp().equals(refTimestamp)) { res.getClass()); result.setHttpCode(Integer.toString(res.getStatusCode()));