Tabnine Logo
RecordingInputStream.getSize
Code IndexAdd Tabnine to your IDE (free)

How to use
getSize
method
in
org.archive.io.RecordingInputStream

Best Java code snippets using org.archive.io.RecordingInputStream.getSize (Showing top 20 results out of 315)

origin: internetarchive/heritrix3

@Override
protected boolean shouldExtract(CrawlURI uri) {
  long max = getMaxSizeToParse();
  if (uri.getRecorder().getRecordedInput().getSize() > max) {
    return false;
  }
  String ct = uri.getContentType();
  return (ct != null) && (ct.startsWith("application/pdf"));
}

origin: internetarchive/heritrix3

/**
 * Update CrawlURI internal sizes based on current transaction (and
 * in the case of 304s, history) 
 * 
 * @param curi CrawlURI
 * @param rec HttpRecorder
 */
protected void setSizes(CrawlURI curi, Recorder rec) {
  // set reporting size
  curi.setContentSize(rec.getRecordedInput().getSize());
  // add contentSize to extraInfo so it's available to log in the crawl log
  curi.addExtraInfo("contentSize", rec.getRecordedInput().getSize());
  // special handling for 304-not modified
  if (curi.getFetchStatus() == HttpStatus.SC_NOT_MODIFIED
      && curi.getFetchHistory() != null) {
    Map<String, Object>[] history = curi.getFetchHistory();
    if (history[0] != null && history[0].containsKey(A_REFERENCE_LENGTH)) {
      long referenceLength = (Long) history[0].get(A_REFERENCE_LENGTH);
      // carry-forward previous 'reference-length' for future
      curi.getData().put(A_REFERENCE_LENGTH, referenceLength);
      // increase content-size to virtual-size for reporting
      curi.setContentSize(rec.getRecordedInput().getSize()
          + referenceLength);
    }
  }
}
origin: internetarchive/heritrix3

public static long getRecordedSize(CrawlURI puri) {
  if (puri.getRecorder() == null) {
    return puri.getContentSize();
  } else {
    return puri.getRecorder().getRecordedInput().getSize();
  }
}

origin: internetarchive/heritrix3

/**
 * Get size of data recorded (transferred)
 * 
 * @return recorded data size
 */
public long getRecordedSize() {
  return (getRecorder() != null) ? getRecorder()
      .getRecordedInput().getSize()
      // if unavailable fall back on content-size
      : getContentSize();
}
origin: internetarchive/heritrix3

protected URI writeRevisit(final WARCWriter w,
    final String timestamp, final String mimetype,
    final URI baseid, final CrawlURI curi,
    final ANVLRecord headers)  
        throws IOException {
  long revisedLength = 0; // By default, truncate all data 
  
  if (curi.getRevisitProfile().getProfileName().equals(PROFILE_REVISIT_IDENTICAL_DIGEST) ) {
    // Save response from identical digest matches
    revisedLength = curi.getRecorder().getRecordedInput().getContentBegin();
    revisedLength = revisedLength > 0 
        ? revisedLength 
        : curi.getRecorder().getRecordedInput().getSize();
  }
  return writeRevisit(w, timestamp, mimetype, baseid, curi,
      headers, revisedLength);
}

origin: internetarchive/heritrix3

} finally {
  recorder.close();
  curi.setContentSize(recorder.getRecordedInput().getSize());
  logger.fine(curi + ": " + recorder.getRecordedInput().getSize() + " bytes read");
origin: internetarchive/heritrix3

recordInfo.setRecordId(baseid);
recordInfo.setExtraHeaders(namedFields);
recordInfo.setContentLength(curi.getRecorder().getRecordedInput().getSize());
recordInfo.setEnforceLength(true);
origin: internetarchive/heritrix3

protected void writeWhoisRecords(WARCWriter w, CrawlURI curi, URI baseid,
    String timestamp) throws IOException {
  WARCRecordInfo recordInfo = new WARCRecordInfo();
  recordInfo.setType(WARCRecordType.response);
  recordInfo.setUrl(curi.toString());
  recordInfo.setCreate14DigitDate(timestamp);
  recordInfo.setMimetype(curi.getContentType());
  recordInfo.setRecordId(baseid);
  recordInfo.setContentLength(curi.getRecorder().getRecordedInput().getSize());
  recordInfo.setEnforceLength(true);
  
  Object whoisServerIP = curi.getData().get(CoreAttributeConstants.A_WHOIS_SERVER_IP);
  if (whoisServerIP != null) {
    recordInfo.addExtraHeader(HEADER_KEY_IP, whoisServerIP.toString());
  }
  
  ReplayInputStream ris =
    curi.getRecorder().getRecordedInput().getReplayInputStream();
  recordInfo.setContentStream(ris);
  
  try {
    w.writeRecord(recordInfo);
  } finally {
    IOUtils.closeQuietly(ris);
  }
  recordInfo.getRecordId();
}
origin: internetarchive/heritrix3

protected void writeDnsRecords(final CrawlURI curi, WARCWriter w,
    final URI baseid, final String timestamp) throws IOException {
  WARCRecordInfo recordInfo = new WARCRecordInfo();
  recordInfo.setType(WARCRecordType.response);
  recordInfo.setUrl(curi.toString());
  recordInfo.setCreate14DigitDate(timestamp);
  recordInfo.setMimetype(curi.getContentType());
  recordInfo.setRecordId(baseid);
  
  recordInfo.setContentLength(curi.getRecorder().getRecordedInput().getSize());
  recordInfo.setEnforceLength(true);
  
  String ip = (String)curi.getData().get(A_DNS_SERVER_IP_LABEL);
  if (ip != null && ip.length() > 0) {
    recordInfo.addExtraHeader(HEADER_KEY_IP, ip);
  }
  
  ReplayInputStream ris =
    curi.getRecorder().getRecordedInput().getReplayInputStream();
  recordInfo.setContentStream(ris);
  
  try {
    w.writeRecord(recordInfo);
  } finally {
    IOUtils.closeQuietly(ris);
  }
  
  recordInfo.getRecordId();
}
origin: internetarchive/heritrix3

+ " " + curi.getUURI().toString() + " "
+ response.getStatusLine().getStatusCode() + " "
+ rec.getRecordedInput().getSize() + " "
+ curi.getContentType());
origin: internetarchive/heritrix3

protected URI writeResource(final WARCWriter w,
    final String timestamp, final String mimetype,
    final URI baseid, final CrawlURI curi,
    final ANVLRecord namedFields) 
throws IOException {
  WARCRecordInfo recordInfo = new WARCRecordInfo();
  recordInfo.setType(WARCRecordType.resource);
  recordInfo.setUrl(curi.toString());
  recordInfo.setCreate14DigitDate(timestamp);
  recordInfo.setMimetype(mimetype);
  recordInfo.setRecordId(baseid);
  recordInfo.setExtraHeaders(namedFields);
  recordInfo.setContentLength(curi.getRecorder().getRecordedInput().getSize());
  recordInfo.setEnforceLength(true);
  
  ReplayInputStream ris = curi.getRecorder().getRecordedInput().getReplayInputStream();
  recordInfo.setContentStream(ris);
  try {
    w.writeRecord(recordInfo);
  } finally {
    IOUtils.closeQuietly(ris);
  }
  
  return recordInfo.getRecordId();
}
origin: internetarchive/heritrix3

recorder.close();
curi.setContentSize(recorder.getRecordedInput().getSize());
  logger.fine("read " + recorder.getRecordedInput().getSize()
      + " bytes from ftp data socket");
origin: iipc/webarchive-commons

/**
 * Calculate a recommended size for an in-memory decoded-character buffer
 * of this content. We seek a size that is itself no larger (in 2-byte chars)
 * than the memory already used by the RecordingInputStream's internal raw 
 * byte buffer, and also no larger than likely necessary. So, we take the 
 * minimum of the actual recorded byte size and the RecordingInputStream's
 * max buffer size. 
 * 
 * @param inStream
 * @return int length for in-memory decoded-character buffer
 */
static protected int calcRecommendedCharBufferSize(RecordingInputStream inStream) {
  return (int) Math.min(inStream.getRecordedBufferLength()/2, inStream.getSize());
}

origin: org.netpreserve.commons/webarchive-commons

/**
 * Calculate a recommended size for an in-memory decoded-character buffer
 * of this content. We seek a size that is itself no larger (in 2-byte chars)
 * than the memory already used by the RecordingInputStream's internal raw 
 * byte buffer, and also no larger than likely necessary. So, we take the 
 * minimum of the actual recorded byte size and the RecordingInputStream's
 * max buffer size. 
 * 
 * @param inStream
 * @return int length for in-memory decoded-character buffer
 */
static protected int calcRecommendedCharBufferSize(RecordingInputStream inStream) {
  return (int) Math.min(inStream.getRecordedBufferLength()/2, inStream.getSize());
}

origin: org.netpreserve.commons/commons-web

/**
 * Calculate a recommended size for an in-memory decoded-character buffer
 * of this content. We seek a size that is itself no larger (in 2-byte chars)
 * than the memory already used by the RecordingInputStream's internal raw 
 * byte buffer, and also no larger than likely necessary. So, we take the 
 * minimum of the actual recorded byte size and the RecordingInputStream's
 * max buffer size. 
 * 
 * @param inStream
 * @return int length for in-memory decoded-character buffer
 */
static protected int calcRecommendedCharBufferSize(RecordingInputStream inStream) {
  return (int) Math.min(inStream.getRecordedBufferLength()/2, inStream.getSize());
}

origin: org.archive.heritrix/heritrix-modules

@Override
protected boolean shouldExtract(CrawlURI uri) {
  long max = getMaxSizeToParse();
  if (uri.getRecorder().getRecordedInput().getSize() > max) {
    return false;
  }
  String ct = uri.getContentType();
  return (ct != null) && (ct.startsWith("application/pdf"));
}

origin: org.archive.heritrix/heritrix-modules

/**
 * Get size of data recorded (transferred)
 * 
 * @return recorded data size
 */
public long getRecordedSize() {
  return (getRecorder() != null) ? getRecorder()
      .getRecordedInput().getSize()
      // if unavailable fall back on content-size
      : getContentSize();
}
origin: org.archive.heritrix/heritrix-modules

public static long getRecordedSize(CrawlURI puri) {
  if (puri.getRecorder() == null) {
    return puri.getContentSize();
  } else {
    return puri.getRecorder().getRecordedInput().getSize();
  }
}

origin: org.archive.heritrix/heritrix-modules

protected URI writeRevisit(final WARCWriter w,
    final String timestamp, final String mimetype,
    final URI baseid, final CrawlURI curi,
    final ANVLRecord headers)  
        throws IOException {
  long revisedLength = 0; // By default, truncate all data 
  
  if (curi.getRevisitProfile().getProfileName().equals(PROFILE_REVISIT_IDENTICAL_DIGEST) ) {
    // Save response from identical digest matches
    revisedLength = curi.getRecorder().getRecordedInput().getContentBegin();
    revisedLength = revisedLength > 0 
        ? revisedLength 
        : curi.getRecorder().getRecordedInput().getSize();
  }
  return writeRevisit(w, timestamp, mimetype, baseid, curi,
      headers, revisedLength);
}

origin: org.archive.heritrix/heritrix-modules

protected void writeWhoisRecords(WARCWriter w, CrawlURI curi, URI baseid,
    String timestamp) throws IOException {
  WARCRecordInfo recordInfo = new WARCRecordInfo();
  recordInfo.setType(WARCRecordType.response);
  recordInfo.setUrl(curi.toString());
  recordInfo.setCreate14DigitDate(timestamp);
  recordInfo.setMimetype(curi.getContentType());
  recordInfo.setRecordId(baseid);
  recordInfo.setContentLength(curi.getRecorder().getRecordedInput().getSize());
  recordInfo.setEnforceLength(true);
  
  Object whoisServerIP = curi.getData().get(CoreAttributeConstants.A_WHOIS_SERVER_IP);
  if (whoisServerIP != null) {
    recordInfo.addExtraHeader(HEADER_KEY_IP, whoisServerIP.toString());
  }
  
  ReplayInputStream ris =
    curi.getRecorder().getRecordedInput().getReplayInputStream();
  recordInfo.setContentStream(ris);
  
  try {
    w.writeRecord(recordInfo);
  } finally {
    IOUtils.closeQuietly(ris);
  }
  recordInfo.getRecordId();
}
org.archive.ioRecordingInputStreamgetSize

Popular methods of RecordingInputStream

  • getReplayInputStream
  • close
  • getMessageBodyReplayInputStream
  • getResponseContentLength
  • isOpen
  • <init>
    Create a new RecordingInputStream.
  • clearForReuse
  • closeRecorder
  • getRecordedBufferLength
    Expose the amount of in-memory buffering used by the internal recording stream.
  • markContentBegin
  • open
  • read
  • open,
  • read,
  • chopAtMessageBodyBegin,
  • getContentBegin,
  • getDigestValue,
  • readFullyOrUntil,
  • readToEndOfContent,
  • setDigest,
  • setLimits

Popular in Java

  • Parsing JSON documents to java classes using gson
  • runOnUiThread (Activity)
  • getSystemService (Context)
  • getExternalFilesDir (Context)
  • NumberFormat (java.text)
    The abstract base class for all number formats. This class provides the interface for formatting and
  • AtomicInteger (java.util.concurrent.atomic)
    An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
  • JTable (javax.swing)
  • BasicDataSource (org.apache.commons.dbcp)
    Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
  • Reflections (org.reflections)
    Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
  • SAXParseException (org.xml.sax)
    Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
  • Top plugins for WebStorm
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now