public void write(T value) throws IOException, InterruptedException { writeSupport.write(value); ++ recordCount; checkBlockSizeReached(); }
@Override public void close() throws IOException { try { writer.close(); } catch (InterruptedException e) { throw new IOException(e); } } }
/** * {@inheritDoc} */ @Override public void write(Void key, T value) throws IOException, InterruptedException { internalWriter.write(value); }
private void checkBlockSizeReached() throws IOException { if (recordCount >= recordCountForNextMemCheck) { // checking the memory size is relatively expensive, so let's not do it for every record. long memSize = columnStore.getBufferedSize(); if (memSize > rowGroupSizeThreshold) { LOG.info(format("mem size %,d > %,d: flushing %,d records to disk.", memSize, rowGroupSizeThreshold, recordCount)); flushRowGroupToStore(); initStore(); recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCount / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK); } else { float recordSize = (float) memSize / recordCount; recordCountForNextMemCheck = min( max(MINIMUM_RECORD_COUNT_FOR_CHECK, (recordCount + (long)(rowGroupSizeThreshold / recordSize)) / 2), // will check halfway recordCount + MAXIMUM_RECORD_COUNT_FOR_CHECK // will not look more than max records ahead ); if (DEBUG) LOG.debug(format("Checked mem at %,d will check again at: %,d ", recordCount, recordCountForNextMemCheck)); } } }
maxColCount = Math.max(w.getSchema().getColumns().size(), maxColCount); newSize, minMemoryAllocation)){}; entry.getKey().setRowGroupSizeThreshold(newSize); LOG.debug(String.format("Adjust block size from %,d to %,d for writer: %s", entry.getValue(), newSize, entry.getKey()));
boolean validating, WriterVersion writerVersion) { internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema, extraMetaData, blockSize, pageSize, compressor, dictionaryPageSize, enableDictionary, validating, writerVersion); WriterVersion writerVersion, MemoryManager memoryManager) { internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema, extraMetaData, blockSize, pageSize, compressor, dictionaryPageSize, enableDictionary, validating, writerVersion);
public void close() throws IOException, InterruptedException { flushRowGroupToStore(); FinalizedWriteContext finalWriteContext = writeSupport.finalizeWrite(); Map<String, String> finalMetadata = new HashMap<String, String>(extraMetaData); finalMetadata.putAll(finalWriteContext.getExtraMetaData()); parquetFileWriter.end(finalMetadata); }
this.validating = validating; this.parquetProperties = new ParquetProperties(dictionaryPageSize, writerVersion, enableDictionary); initStore();
this.writer = new InternalParquetRecordWriter<T>( fileWriter, writeSupport,
public void write(T object) throws IOException { try { writer.write(object); } catch (InterruptedException e) { throw new IOException(e); } }
/** * {@inheritDoc} */ @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { internalWriter.close(); if (memoryManager != null) { memoryManager.removeWriter(internalWriter); } }