parquet.Log.info java code examples

protected boolean isPigLoaded() {
 try {
  Class.forName("org.apache.pig.impl.logicalLayer.schema.Schema");
  return true;
 } catch (ClassNotFoundException e) {
  LOG.info("Pig is not loaded, pig metadata will not be written");
  return false;
 }
}

private void log(Object o) {
 LOG.info(o);
}

private static <T> List<T> runAllInParallel(int parallelism, List<Callable<T>> toRun) throws ExecutionException {
 LOG.info("Initiating action with parallelism: " + parallelism);
 ExecutorService threadPool = Executors.newFixedThreadPool(parallelism);
 try {
  List<Future<T>> futures = new ArrayList<Future<T>>();
  for (Callable<T> callable : toRun) {
   futures.add(threadPool.submit(callable));
  }
  List<T> result = new ArrayList<T>(toRun.size());
  for (Future<T> future : futures) {
   try {
    result.add(future.get());
   } catch (InterruptedException e) {
    throw new RuntimeException("The thread was interrupted", e);
   }
  }
  return result;
 } finally {
  threadPool.shutdownNow();
 }
}

private static List<FileStatus> getAllFileRecursively(
  List<FileStatus> files, Configuration conf) throws IOException {
 List<FileStatus> result = new ArrayList<FileStatus>();
 for (FileStatus file : files) {
  if (file.isDir()) {
   Path p = file.getPath();
   FileSystem fs = p.getFileSystem(conf);
   staticAddInputPathRecursively(result, fs, p, HiddenFileFilter.INSTANCE);
  } else {
   result.add(file);
  }
 }
 LOG.info("Total input paths to process : " + result.size());
 return result;
}

/**
 * Given a FilterPredicate, return a Filter that wraps it.
 * This method also logs the filter being used and rewrites
 * the predicate to not include the not() operator.
 */
public static Filter get(FilterPredicate filterPredicate) {
 checkNotNull(filterPredicate, "filterPredicate");
 LOG.info("Filtering using predicate: " + filterPredicate);
 // rewrite the predicate to not include the not() operator
 FilterPredicate collapsedPredicate = LogicalInverseRewriter.rewrite(filterPredicate);
 if (!filterPredicate.equals(collapsedPredicate)) {
  LOG.info("Predicate has been collapsed to: " + collapsedPredicate);
 }
 return new FilterPredicateCompat(collapsedPredicate);
}

static ParquetMetadata readSummaryMetadata(Configuration configuration, Path basePath, boolean skipRowGroups) throws IOException {
 Path metadataFile = new Path(basePath, PARQUET_METADATA_FILE);
 Path commonMetaDataFile = new Path(basePath, PARQUET_COMMON_METADATA_FILE);
 FileSystem fileSystem = basePath.getFileSystem(configuration);
 if (skipRowGroups && fileSystem.exists(commonMetaDataFile)) {
  // reading the summary file that does not contain the row groups
  if (Log.INFO) LOG.info("reading summary file: " + commonMetaDataFile);
  return readFooter(configuration, commonMetaDataFile, filter(skipRowGroups));
 } else if (fileSystem.exists(metadataFile)) {
  if (Log.INFO) LOG.info("reading summary file: " + metadataFile);
  return readFooter(configuration, metadataFile, filter(skipRowGroups));
 } else {
  return null;
 }
}

protected PageHeader readPageHeader() throws IOException {
 PageHeader pageHeader;
 int initialPos = this.pos;
 try {
  pageHeader = Util.readPageHeader(this);
 } catch (IOException e) {
  // this is to workaround a bug where the compressedLength
  // of the chunk is missing the size of the header of the dictionary
  // to allow reading older files (using dictionary) we need this.
  // usually 13 to 19 bytes are missing
  // if the last page is smaller than this, the page header itself is truncated in the buffer.
  this.pos = initialPos; // resetting the buffer to the position before we got the error
  LOG.info("completing the column chunk to read the page header");
  pageHeader = Util.readPageHeader(new SequenceInputStream(this, f)); // trying again from the buffer + remainder of the stream.
 }
 return pageHeader;
}

private void checkBlockSizeReached() throws IOException {
 if (recordCount >= recordCountForNextMemCheck) { // checking the memory size is relatively expensive, so let's not do it for every record.
  long memSize = columnStore.getBufferedSize();
  if (memSize > rowGroupSizeThreshold) {
   LOG.info(format("mem size %,d > %,d: flushing %,d records to disk.", memSize, rowGroupSizeThreshold, recordCount));
   flushRowGroupToStore();
   initStore();
   recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCount / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK);
  } else {
   float recordSize = (float) memSize / recordCount;
   recordCountForNextMemCheck = min(
     max(MINIMUM_RECORD_COUNT_FOR_CHECK, (recordCount + (long)(rowGroupSizeThreshold / recordSize)) / 2), // will check halfway
     recordCount + MAXIMUM_RECORD_COUNT_FOR_CHECK // will not look more than max records ahead
     );
   if (DEBUG) LOG.debug(format("Checked mem at %,d will check again at: %,d ", recordCount, recordCountForNextMemCheck));
  }
 }
}

private void checkBlockSizeReached() throws IOException {
 if (recordCount >= recordCountForNextMemCheck) { // checking the memory size is relatively expensive, so let's not do it for every record.
  long memSize = store.memSize();
  if (memSize > blockSize) {
   LOG.info(format("mem size %,d > %,d: flushing %,d records to disk.", memSize, blockSize, recordCount));
   flushStore();
   initStore();
   recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCount / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK);
  } else {
   float recordSize = (float) memSize / recordCount;
   recordCountForNextMemCheck = min(
     max(MINIMUM_RECORD_COUNT_FOR_CHECK, (recordCount + (long)(blockSize / recordSize)) / 2), // will check halfway
     recordCount + MAXIMUM_RECORD_COUNT_FOR_CHECK // will not look more than max records ahead
   );
   if (DEBUG) LOG.debug(format("Checked mem at %,d will check again at: %,d ", recordCount, recordCountForNextMemCheck));
  }
 }
}

public CompressionCodecName getCodec() {
 CompressionCodecName codec;
 Configuration configuration = getConfiguration();
 if (isParquetCompressionSet(configuration)) { // explicit parquet config
  codec = getParquetCompressionCodec(configuration);
 } else if (isHadoopCompressionSet()) { // from hadoop config
  codec = getHadoopCompressionCodec();
 } else {
  if (INFO) LOG.info("Compression set to false");
  codec = CompressionCodecName.UNCOMPRESSED;
 }
 if (INFO) LOG.info("Compression: " + codec.name());
 return codec;
}

@Override
public RecordMaterializer<Tuple> prepareForRead(
  Configuration configuration,
  Map<String, String> keyValueMetaData,
  MessageType fileSchema,
  ReadContext readContext) {
 MessageType requestedSchema = readContext.getRequestedSchema();
 Schema requestedPigSchema = getPigSchema(configuration);
 if (requestedPigSchema == null) {
  throw new ParquetDecodingException("Missing Pig schema: ParquetLoader sets the schema in the job conf");
 }
 boolean elephantBirdCompatible = configuration.getBoolean(PARQUET_PIG_ELEPHANT_BIRD_COMPATIBLE, false);
 boolean columnIndexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
 if (elephantBirdCompatible) {
  LOG.info("Numbers will default to 0 instead of NULL; Boolean will be converted to Int");
 }
 return new TupleRecordMaterializer(requestedSchema, requestedPigSchema, elephantBirdCompatible, columnIndexAccess);
}

private CompressionCodecName getHadoopCompressionCodec() {
 CompressionCodecName codec;
 try {
  // find the right codec
  Class<?> codecClass = getHadoopOutputCompressorClass(CompressionCodecName.UNCOMPRESSED.getHadoopCompressionCodecClass());
  if (INFO) LOG.info("Compression set through hadoop codec: " + codecClass.getName());
  codec = CompressionCodecName.fromCompressionCodec(codecClass);
 } catch (CompressionCodecNotSupportedException e) {
  if (WARN)
   LOG.warn("codec defined in hadoop config is not supported by parquet [" + e.getCodecClass().getName() + "] and will use UNCOMPRESSED", e);
  codec = CompressionCodecName.UNCOMPRESSED;
 } catch (IllegalArgumentException e) {
  if (WARN) LOG.warn("codec class not found: " + e.getMessage(), e);
  codec = CompressionCodecName.UNCOMPRESSED;
 }
 return codec;
}

public void writeToFileWriter(ParquetFileWriter writer) throws IOException {
 writer.startColumn(path, totalValueCount, compressor.getCodecName());
 if (dictionaryPage != null) {
  writer.writeDictionaryPage(dictionaryPage);
  encodings.add(dictionaryPage.getEncoding());
 }
 writer.writeDataPages(buf, uncompressedLength, compressedLength, totalStatistics, new ArrayList<Encoding>(encodings));
 writer.endColumn();
 if (INFO) {
  LOG.info(
    String.format(
      "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s",
      buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, encodings)
    + (dictionaryPage != null ? String.format(
        ", dic { %,d entries, %,dB raw, %,dB comp}",
        dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize())
        : ""));
 }
 encodings.clear();
 pageCount = 0;
}

public BytesInput readAsBytesInput(int size) throws IOException {
 if (pos + size > count) {
  // this is to workaround a bug where the compressedLength
  // of the chunk is missing the size of the header of the dictionary
  // to allow reading older files (using dictionary) we need this.
  // usually 13 to 19 bytes are missing
  int l1 = count - pos;
  int l2 = size - l1;
  LOG.info("completed the column chunk with " + l2 + " bytes");
  return BytesInput.concat(super.readAsBytesInput(l1), BytesInput.copy(BytesInput.from(f, l2)));
 }
 return super.readAsBytesInput(size);
}

public void writeToFileWriter(ParquetFileWriter writer) throws IOException {
 writer.startColumn(path, totalValueCount, compressor.getCodecName());
 if (dictionaryPage != null) {
  writer.writeDictionaryPage(dictionaryPage);
  encodings.add(dictionaryPage.getEncoding());
 }
 writer.writeDataPages(BytesInput.from(buf), uncompressedLength, compressedLength, totalStatistics, new ArrayList<Encoding>(encodings));
 writer.endColumn();
 if (INFO) {
  LOG.info(
    String.format(
      "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s",
      buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, encodings)
      + (dictionaryPage != null ? String.format(
      ", dic { %,d entries, %,dB raw, %,dB comp}",
      dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize())
      : ""));
 }
 encodings.clear();
 pageCount = 0;
}

public void initialize(MessageType requestedSchema, MessageType fileSchema,
            Map<String, String> extraMetadata, Map<String, String> readSupportMetadata,
            Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 this.requestedSchema = requestedSchema;
 this.fileSchema = fileSchema;
 this.file = file;
 this.columnCount = this.requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, extraMetadata, fileSchema,
   new ReadSupport.ReadContext(requestedSchema, readSupportMetadata));
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

private void flushRowGroupToStore()
  throws IOException {
 LOG.info(format("Flushing mem columnStore to file. allocated memory: %,d", columnStore.getAllocatedSize()));
 if (columnStore.getAllocatedSize() > 3 * (long)rowGroupSizeThreshold) {
  LOG.warn("Too much memory used: " + columnStore.memUsageString());
 }
 if (recordCount > 0) {
  parquetFileWriter.startBlock(recordCount);
  columnStore.flush();
  pageStore.flushToFileWriter(parquetFileWriter);
  recordCount = 0;
  parquetFileWriter.endBlock();
 }
 columnStore = null;
 pageStore = null;
}

 private void flushStore()
   throws IOException {
  LOG.info(format("Flushing mem store to file. allocated memory: %,d", store.allocatedSize()));
  if (store.allocatedSize() > 3 * blockSize) {
   LOG.warn("Too much memory used: " + store.memUsageString());
  }
  w.startBlock(recordCount);
  store.flush();
  pageStore.flushToFileWriter(w);
  recordCount = 0;
  w.endBlock();
  store = null;
  pageStore = null;
 }
}

public void initialize(MessageType fileSchema,
  Map<String, String> fileMetadata,
  Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 // initialize a ReadContext for this file
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
   configuration, toSetMultiMap(fileMetadata), fileSchema));
 this.requestedSchema = readContext.getRequestedSchema();
 this.fileSchema = fileSchema;
 this.file = file;
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

/**
 * end a column (once all rep, def and data have been written)
 * @throws IOException
 */
public void endColumn() throws IOException {
 state = state.endColumn();
 if (DEBUG) LOG.debug(out.getPos() + ": end column");
 currentBlock.addColumn(ColumnChunkMetaData.get(
   currentChunkPath,
   currentChunkType,
   currentChunkCodec,
   currentEncodings,
   currentStatistics,
   currentChunkFirstDataPage,
   currentChunkDictionaryPageOffset,
   currentChunkValueCount,
   compressedLength,
   uncompressedLength));
 if (DEBUG) LOG.info("ended Column chumk: " + currentColumn);
 currentColumn = null;
 this.currentBlock.setTotalByteSize(currentBlock.getTotalByteSize() + uncompressedLength);
 this.uncompressedLength = 0;
 this.compressedLength = 0;
}

Javadoc

prints an info message

Popular methods of Log

debug
prints a debug message
<init>
warn
prints a warn message
getLog
error
prints an error message
exit
setFoo
setSomeRequestData
setmAmount
setmDate
setmId
setmMiles

Popular in Java

Running tasks concurrently on multiple threads
getResourceAsStream (ClassLoader)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
notifyDataSetChanged (ArrayAdapter)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top Sublime Text plugins

How to use infomethodin parquet.Log

Best Java code snippets using parquet.Log.info (Showing top 20 results out of 315)

How to use
info
method
in
parquet.Log