org.apache.hadoop.hive.ql.io.orc.OrcSplit.isOriginal java code examples

/**
 * {@link VectorizedOrcAcidRowBatchReader} is always used for vectorized reads of acid tables.
 * In some cases this cannot be used from LLAP IO elevator because
 * {@link RecordReader#getRowNumber()} is not (currently) available there but is required to
 * generate ROW__IDs for "original" files
 * @param hasDeletes - if there are any deletes that apply to this split
 * todo: HIVE-17944
 */
static boolean canUseLlapForAcid(OrcSplit split, boolean hasDeletes, Configuration conf) {
 if(!split.isOriginal()) {
  return true;
 }
 VectorizedRowBatchCtx rbCtx = Utilities.getVectorizedRowBatchCtx(conf);
 if(rbCtx == null) {
  throw new IllegalStateException("Could not create VectorizedRowBatchCtx for " + split.getPath());
 }
 return !needSyntheticRowIds(split.isOriginal(), hasDeletes, areRowIdsProjected(rbCtx));
}

private static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException {
 Path path = orcSplit.getPath();
 Path root;
 if (orcSplit.hasBase()) {
  if (orcSplit.isOriginal()) {
   root = path.getParent();
  } else {
   root = path.getParent().getParent();
  }
 } else {
  root = path;
 }
 return AcidUtils.deserializeDeleteDeltas(root, orcSplit.getDeltas());
}

/**
 * Returns whether it is possible to create a valid instance of this class for a given split.
 * @param conf is the job configuration
 * @param inputSplit
 * @return true if it is possible, else false.
 */
public static boolean canCreateVectorizedAcidRowBatchReaderOnSplit(JobConf conf, InputSplit inputSplit) {
 if (!(inputSplit instanceof OrcSplit)) {
  return false; // must be an instance of OrcSplit.
 }
 // First check if we are reading any original files in the split.
 // To simplify the vectorization logic, the vectorized acid row batch reader does not handle
 // original files for now as they have a different schema than a regular ACID file.
 final OrcSplit split = (OrcSplit) inputSplit;
 if (AcidUtils.getAcidOperationalProperties(conf).isSplitUpdate() && !split.isOriginal()) {
  // When split-update is turned on for ACID, a more optimized vectorized batch reader
  // can be created. But still only possible when we are *NOT* reading any originals.
  return true;
 }
 return false; // no split-update or possibly reading originals!
}

static Path[] getDeleteDeltaDirsFromSplit(OrcSplit orcSplit) throws IOException {
 Path path = orcSplit.getPath();
 Path root;
 if (orcSplit.hasBase()) {
  if (orcSplit.isOriginal()) {
   root = orcSplit.getRootDir();
  } else {
   root = path.getParent().getParent();//todo: why not just use getRootDir()?
   assert root.equals(orcSplit.getRootDir()) : "root mismatch: baseDir=" + orcSplit.getRootDir() +
    " path.p.p=" + root;
  }
 } else {
  throw new IllegalStateException("Split w/o base w/Acid 2.0??: " + path);
 }
 return AcidUtils.deserializeDeleteDeltas(root, orcSplit.getDeltas());
}

public OrcNewSplit(OrcSplit inner) throws IOException {
 super(inner.getPath(), inner.getStart(), inner.getLength(),
    inner.getLocations());
 this.orcTail = inner.getOrcTail();
 this.hasFooter = inner.hasFooter();
 this.isOriginal = inner.isOriginal();
 this.hasBase = inner.hasBase();
 this.deltas.addAll(inner.getDeltas());
}

public OrcNewSplit(OrcSplit inner) throws IOException {
 super(inner.getPath(), inner.getStart(), inner.getLength(),
    inner.getLocations());
 this.orcTail = inner.getOrcTail();
 this.hasFooter = inner.hasFooter();
 this.isOriginal = inner.isOriginal();
 this.hasBase = inner.hasBase();
 this.deltas.addAll(inner.getDeltas());
}

SortMergedDeleteEventRegistry(JobConf conf, OrcSplit orcSplit,
  Reader.Options readerOptions) throws IOException {
 final Path[] deleteDeltas = getDeleteDeltaDirsFromSplit(orcSplit);
 if (deleteDeltas.length > 0) {
  int bucket = AcidUtils.parseBucketId(orcSplit.getPath());
  String txnString = conf.get(ValidWriteIdList.VALID_WRITEIDS_KEY);
  this.validWriteIdList
      = (txnString == null) ? new ValidReaderWriteIdList() : new ValidReaderWriteIdList(txnString);
  LOG.debug("Using SortMergedDeleteEventRegistry");
  OrcRawRecordMerger.Options mergerOptions = new OrcRawRecordMerger.Options().isDeleteReader(true);
  assert !orcSplit.isOriginal() : "If this now supports Original splits, set up mergeOptions properly";
  this.deleteRecords = new OrcRawRecordMerger(conf, true, null, false, bucket,
                        validWriteIdList, readerOptions, deleteDeltas,
                        mergerOptions);
  this.deleteRecordKey = new OrcRawRecordMerger.ReaderKey();
  this.deleteRecordValue = this.deleteRecords.createValue();
  // Initialize the first value in the delete reader.
  this.isDeleteRecordAvailable = this.deleteRecords.next(deleteRecordKey, deleteRecordValue);
 } else {
  this.isDeleteRecordAvailable = false;
  this.deleteRecordKey = null;
  this.deleteRecordValue = null;
  this.deleteRecords = null;
 }
}

  OrcFile.readerOptions(conf));
if(orcSplit.isOriginal()) {

 if (split.isOriginal()) {
  root = path.getParent();
 } else {
 new ValidReadTxnList(txnString);
final OrcRawRecordMerger records =
  new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket,
    validTxnList, readOptions, deltas);
return new RowReader<OrcStruct>() {

    "delta_0000001_0000010_0000/bucket_00000",
  splits.get(0).getPath().toUri().toString());
assertFalse(splits.get(0).isOriginal());

 if(split.isOriginal()) {
  mergerOptions.copyIndex(acidIOOptions.getCopyNumber()).bucketPath(split.getPath());
  new OrcRawRecordMerger(conf, true, reader, split.isOriginal(), bucket,
    validWriteIdList, readOptions, deltas, mergerOptions);
return new RowReader<OrcStruct>() {

isOriginal = orcSplit.isOriginal();
if (isOriginal) {
 recordIdColumnVector = new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE,

assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
assertEquals("mock:/a/b/000000_1", splits.get(1).getPath().toUri().toString());
assertTrue(splits.get(0).isOriginal());
assertTrue(splits.get(1).isOriginal());
assertEquals("mock:/a/base_0000001/bucket_00000", splits.get(0).getPath().toUri().toString());
assertEquals("mock:/a/base_0000001/bucket_00001", splits.get(1).getPath().toUri().toString());
assertFalse(splits.get(0).isOriginal());
assertFalse(splits.get(1).isOriginal());
assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
assertEquals("mock:/a/b/000000_1", splits.get(1).getPath().toUri().toString());
assertTrue(splits.get(0).isOriginal());
assertTrue(splits.get(1).isOriginal());
assertEquals(true, splitStrategies.get(1) instanceof OrcInputFormat.ACIDSplitStrategy);
splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(1)).getSplits();
assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00000", splits.get(0).getPath().toUri().toString());
assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00001", splits.get(1).getPath().toUri().toString());
assertFalse(splits.get(0).isOriginal());
assertFalse(splits.get(1).isOriginal());
assertEquals(1, splits.size());
assertEquals("mock:/a/b/000000_0", splits.get(0).getPath().toUri().toString());
assertTrue(splits.get(0).isOriginal());
assertEquals(true, splitStrategies.get(1) instanceof OrcInputFormat.ACIDSplitStrategy);
splits = ((OrcInputFormat.ACIDSplitStrategy)splitStrategies.get(1)).getSplits();
assertEquals(1, splits.size());
assertEquals("mock:/a/delta_0000001_0000001_0000/bucket_00000", splits.get(0).getPath().toUri().toString());

assertEquals(root.toUri().toString() + File.separator + "000000_0",
  splits.get(0).getPath().toUri().toString());
assertTrue(splits.get(0).isOriginal());
assertTrue(splits.get(1).isOriginal());
assertTrue(splits.get(2).isOriginal());

    "base_10000002/bucket_00001",
  splits.get(0).getPath().toUri().toString());
assertFalse(splits.get(0).isOriginal());

    "delta_0000001_0000001_0000/bucket_00000",
  splits.get(0).getPath().toUri().toString());
assertFalse(splits.get(0).isOriginal());
assertFalse(splits.get(1).isOriginal());
assertFalse(splits.get(2).isOriginal());

 private static boolean hasDeltas(OrcSplit orcSplit) throws IOException {
  final Path path = orcSplit.getPath();
  final Path root;

  // If the split has a base, extract the base file size, bucket and root path info.
  if (orcSplit.hasBase()) {
   if (orcSplit.isOriginal()) {
    root = path.getParent();
   } else {
    root = path.getParent().getParent();
   }
  } else {
   root = path;
  }

  final Path[] deltas = AcidUtils.deserializeDeltas(root, orcSplit.getDeltas());
  return deltas.length > 0;
 }
}

long offset = fSplit.getStart();
long length = fSplit.getLength();
options.schema(fSplit.isOriginal() ? hiveReader.getSchema() : hiveReader.getSchema().getChildren().get(TRANS_ROW_COLUMN_INDEX));
options.range(offset, length);
boolean[] include = OrcInputFormat.genIncludedColumns(types, jobConf, fSplit.isOriginal());
if (!fSplit.isOriginal()) {
if (!fSplit.isOriginal()) {
 selectedColNames = ArrayUtils.addAll(new String[]{"row"}, selectedColNames);
 final ORCScanFilter orcScanFilter = (ORCScanFilter) filter;
 final SearchArgument sarg = orcScanFilter.getSarg();
 options.searchArgument(sarg, OrcInputFormat.getSargColumnNames(selectedColNames, types, options.getInclude(), fSplit.isOriginal()));
hiveBatch = createVectorizedRowBatch(partitionOI, fSplit.isOriginal());
copiers = HiveORCCopiers.createCopiers(projectedColOrdinals, vectors, hiveBatch, fSplit.isOriginal());

public OrcNewSplit(OrcSplit inner) throws IOException {
 super(inner.getPath(), inner.getStart(), inner.getLength(),
    inner.getLocations());
 this.fileMetaInfo = inner.getFileMetaInfo();
 this.hasFooter = inner.hasFooter();
 this.isOriginal = inner.isOriginal();
 this.hasBase = inner.hasBase();
 this.deltas.addAll(inner.getDeltas());
}

if (split.isOriginal() && split.getDeltas().isEmpty()) {
 if (vectorMode) {
  return createVectorizedReader(inputSplit, conf, reporter);

Javadoc

This means AcidUtils.AcidBaseFileType#ORIGINAL_BASE

Popular methods of OrcSplit

Popular in Java

Finding current android device location
getSupportFragmentManager (FragmentActivity)
scheduleAtFixedRate (ScheduledExecutorService)
getSharedPreferences (Context)
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Top plugins for WebStorm

How to use isOriginalmethodin org.apache.hadoop.hive.ql.io.orc.OrcSplit

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.OrcSplit.isOriginal (Showing top 20 results out of 315)

How to use
isOriginal
method
in
org.apache.hadoop.hive.ql.io.orc.OrcSplit