org.apache.hadoop.hive.ql.io.CombineHiveInputFormat$CombineHiveInputSplit java code examples

CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo);
result.add(csplit);

 /**
  * Writable interface.
  */
 @Override
 public void write(DataOutput out) throws IOException {
  inputSplitShim.write(out);
  if (inputFormatClassName == null) {
   if (pathToPartitionInfo == null) {
    pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
   }
   // extract all the inputFormatClass names for each chunk in the
   // CombinedSplit.
   PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo,
     inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap());
   // create a new InputFormat instance if this is the first time to see
   // this class
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  out.writeUTF(inputFormatClassName);
 }
}

private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException {
 PartitionDesc part = null;
 Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>();
 for (Path path : hsplit.getPaths()) {
  PartitionDesc otherPart = HiveFileFormatUtils.getPartitionDescFromPathRecursively(
    pathToPartInfo, path, cache);
  LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo);
  if (part == null) {
   part = otherPart;
  } else if (otherPart != part) { // Assume we should have the exact same object.
   // TODO: we could also compare the schema and SerDe, and pass only those to the call
   //       instead; most of the time these would be the same and LLAP IO can handle that.
   LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {"
     + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}");
   return null;
  }
 }
 return part;
}

CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo);
result.add(csplit);

CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
result.add(csplit);

/**
 * Create a generic Hive RecordReader than can iterate over all chunks in a
 * CombinedFileSplit.
 */
@Override
public RecordReader getRecordReader(InputSplit split, JobConf job,
  Reporter reporter) throws IOException {
 if (!(split instanceof CombineHiveInputSplit)) {
  return super.getRecordReader(split, job, reporter);
 }
 CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split;
 String inputFormatClassName = null;
 Class inputFormatClass = null;
 try {
  inputFormatClassName = hsplit.inputFormatClassName();
  inputFormatClass = job.getClassByName(inputFormatClassName);
 } catch (Exception e) {
  throw new IOException("cannot find class " + inputFormatClassName);
 }
 pushProjectionsAndFilters(job, inputFormatClass,
   hsplit.getPath(0).toString(),
   hsplit.getPath(0).toUri().getPath());
 return ShimLoader.getHadoopShims().getCombineFileInputFormat()
   .getRecordReader(job,
   ((CombineHiveInputSplit) split).getInputSplitShim(), reporter,
   CombineHiveRecordReader.class);
}

/**
 * Create a generic Hive RecordReader than can iterate over all chunks in a
 * CombinedFileSplit.
 */
@Override
public RecordReader getRecordReader(InputSplit split, JobConf job,
  Reporter reporter) throws IOException {
 if (!(split instanceof CombineHiveInputSplit)) {
  return super.getRecordReader(split, job, reporter);
 }
 CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split;
 String inputFormatClassName = null;
 Class inputFormatClass = null;
 try {
  inputFormatClassName = hsplit.inputFormatClassName();
  inputFormatClass = job.getClassByName(inputFormatClassName);
 } catch (Exception e) {
  throw new IOException("cannot find class " + inputFormatClassName);
 }
 pushProjectionsAndFilters(job, inputFormatClass,
   hsplit.getPath(0).toString(),
   hsplit.getPath(0).toUri().getPath());
 return ShimLoader.getHadoopShims().getCombineFileInputFormat()
   .getRecordReader(job,
   (CombineFileSplit) split, reporter,
   CombineHiveRecordReader.class);
}

 /**
  * Writable interface.
  */
 @Override
 public void write(DataOutput out) throws IOException {
  inputSplitShim.write(out);
  if (inputFormatClassName == null) {
   if (pathToPartitionInfo == null) {
    pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
   }
   // extract all the inputFormatClass names for each chunk in the
   // CombinedSplit.
   PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo,
     inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap());
   // create a new InputFormat instance if this is the first time to see
   // this class
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  out.writeUTF(inputFormatClassName);
 }
}

public CombineHiveRecordReader(InputSplit split, Configuration conf,
  Reporter reporter, Integer partition) throws IOException {
 super((JobConf)conf);
 CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ?
   (CombineHiveInputSplit) split :
   new CombineHiveInputSplit(jobConf, (CombineFileSplit) split);
 String inputFormatClassName = hsplit.inputFormatClassName();
 Class inputFormatClass = null;
 try {
  inputFormatClass = JavaUtils.loadClass(inputFormatClassName);
 } catch (ClassNotFoundException e) {
  throw new IOException("CombineHiveRecordReader: class not found "
    + inputFormatClassName);
 }
 InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache(
   inputFormatClass, jobConf);
 // create a split for the given partition
 FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit
   .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit
   .getLocations());
 this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, reporter));
 this.initIOContext(fsplit, jobConf, inputFormatClass, this.recordReader);
}

public CombineHiveRecordReader(InputSplit split, Configuration conf,
  Reporter reporter, Integer partition) throws IOException {
 JobConf job = (JobConf) conf;
 CombineHiveInputSplit hsplit = new CombineHiveInputSplit(job,
   (InputSplitShim) split);
 String inputFormatClassName = hsplit.inputFormatClassName();
 Class inputFormatClass = null;
 try {
  inputFormatClass = Class.forName(inputFormatClassName);
 } catch (ClassNotFoundException e) {
  throw new IOException("CombineHiveRecordReader: class not found "
    + inputFormatClassName);
 }
 InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache(
   inputFormatClass, job);
 // create a split for the given partition
 FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit
   .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit
   .getLocations());
 this.recordReader = inputFormat.getRecordReader(fsplit, job, reporter);
 this.initIOContext(fsplit, job, inputFormatClass, this.recordReader);
}

 /**
  * Writable interface.
  */
 public void write(DataOutput out) throws IOException {
  inputSplitShim.write(out);
  if (inputFormatClassName == null) {
   Map<String, PartitionDesc> pathToPartitionInfo = Utilities
     .getMapRedWork(getJob()).getPathToPartitionInfo();
   // extract all the inputFormatClass names for each chunk in the
   // CombinedSplit.
   PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo,
     inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap());
   // create a new InputFormat instance if this is the first time to see
   // this class
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  out.writeUTF(inputFormatClassName);
 }
}

CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo);
result.add(csplit);

CombineHiveInputFormat.CombineHiveInputSplit combineSplit =
  (CombineHiveInputFormat.CombineHiveInputSplit) splits[2];
assertEquals(BUCKETS, combineSplit.getNumPaths());
for(int bucket=0; bucket < BUCKETS; ++bucket) {
 assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0",
   combineSplit.getPath(bucket).toString());
 assertEquals(0, combineSplit.getOffset(bucket));
 assertEquals(251, combineSplit.getLength(bucket));
String[] hosts = combineSplit.getLocations();
assertEquals(2, hosts.length);

assertEquals(2, split.getNumPaths());
assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString());
assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString());
assertEquals(length0, split.getLength(0));
assertEquals(getLength(path1, conf), split.getLength(1));
assertEquals(0, split.getOffset(0));
assertEquals(0, split.getOffset(1));
assertTrue(3 >= split.getLocations().length);

CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ?
  (CombineHiveInputSplit) split :
  new CombineHiveInputSplit(jobConf, (CombineFileSplit) split);
String inputFormatClassName = hsplit.inputFormatClassName();
Class inputFormatClass = null;
try {
FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit
  .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit
  .getLocations());
 if (partition != 0 && hsplit.getPaths()[partition -1].equals(hsplit.getPaths()[partition]))
  this.setFooterBuffer(((CombineHiveRecordReader)preReader).getFooterBuffer());

CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ?
  (CombineHiveInputSplit) split :
  new CombineHiveInputSplit(jobConf, (CombineFileSplit) split);
String inputFormatClassName = hsplit.inputFormatClassName();
Class inputFormatClass = null;
try {
FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit
  .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit
  .getLocations());
 if (partition != 0 && hsplit.getPaths()[partition -1].equals(hsplit.getPaths()[partition]))
  this.setFooterBuffer(((CombineHiveRecordReader)preReader).getFooterBuffer());

/**
 * Create a generic Hive RecordReader than can iterate over all chunks in a
 * CombinedFileSplit.
 */
@Override
public RecordReader getRecordReader(InputSplit split, JobConf job,
  Reporter reporter) throws IOException {
 if (!(split instanceof CombineHiveInputSplit)) {
  return super.getRecordReader(split, job, reporter);
 }
 CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split;
 String inputFormatClassName = null;
 Class inputFormatClass = null;
 try {
  inputFormatClassName = hsplit.inputFormatClassName();
  inputFormatClass = job.getClassByName(inputFormatClassName);
 } catch (Exception e) {
  throw new IOException("cannot find class " + inputFormatClassName);
 }
 pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(0));
 return ShimLoader.getHadoopShims().getCombineFileInputFormat()
   .getRecordReader(job,
   (CombineFileSplit) split, reporter,
   CombineHiveRecordReader.class);
}

/**
 * Create a generic Hive RecordReader than can iterate over all chunks in a
 * CombinedFileSplit.
 */
@Override
public RecordReader getRecordReader(InputSplit split, JobConf job,
  Reporter reporter) throws IOException {
 if (!(split instanceof CombineHiveInputSplit)) {
  return super.getRecordReader(split, job, reporter);
 }
 CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split;
 String inputFormatClassName = null;
 Class inputFormatClass = null;
 try {
  inputFormatClassName = hsplit.inputFormatClassName();
  inputFormatClass = job.getClassByName(inputFormatClassName);
 } catch (Exception e) {
  throw new IOException("cannot find class " + inputFormatClassName);
 }
 pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(0));
 return ShimLoader.getHadoopShims().getCombineFileInputFormat()
   .getRecordReader(job,
   (CombineFileSplit) split, reporter,
   CombineHiveRecordReader.class);
}

 /**
  * Writable interface.
  */
 @Override
 public void write(DataOutput out) throws IOException {
  inputSplitShim.write(out);
  if (inputFormatClassName == null) {
   if (pathToPartitionInfo == null) {
    pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
   }
   // extract all the inputFormatClass names for each chunk in the
   // CombinedSplit.
   PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo,
     inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap());
   // create a new InputFormat instance if this is the first time to see
   // this class
   inputFormatClassName = part.getInputFileFormatClass().getName();
  }
  out.writeUTF(inputFormatClassName);
 }
}

private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException {
 PartitionDesc part = null;
 Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>();
 for (Path path : hsplit.getPaths()) {
  PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively(
    pathToPartInfo, path, cache);
  LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo);
  if (part == null) {
   part = otherPart;
  } else if (otherPart != part) { // Assume we should have the exact same object.
   // TODO: we could also compare the schema and SerDe, and pass only those to the call
   //       instead; most of the time these would be the same and LLAP IO can handle that.
   LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {"
     + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}");
   return null;
  }
 }
 return part;
}

Javadoc

CombineHiveInputSplit encapsulates an InputSplit with its corresponding inputFormatClassName. A CombineHiveInputSplit comprises of multiple chunks from different files. Since, they belong to a single directory, there is a single inputformat for all the chunks.

Most used methods

getLocations
Returns all the Paths where this input-split resides.
getPath
Returns the ith Path.
<init>
getJob
getLengths
Returns an array containing the lengths of the files in the split.
getPaths
Returns all the Paths in the split.
getStartOffsets
Returns an array containing the startoffsets of the files in the split.
inputFormatClassName
Returns the inputFormat class name for the i-th chunk.
getInputSplitShim
getLength
Returns the length of the ith Path.
getNumPaths
Returns the number of Paths in the split.
getOffset
Returns the start offset of the ith Path.

Popular in Java

Updating database using SQL prepared statement
addToBackStack (FragmentTransaction)
notifyDataSetChanged (ArrayAdapter)
getContentResolver (Context)
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
Date (java.sql)
A class which can consume and produce dates in SQL Date format. Dates are represented in SQL as yyyy
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
JCheckBox (javax.swing)
JLabel (javax.swing)
Best IntelliJ plugins

How to useCombineHiveInputFormat$CombineHiveInputSplit in org.apache.hadoop.hive.ql.io

Best Java code snippets using org.apache.hadoop.hive.ql.io.CombineHiveInputFormat$CombineHiveInputSplit (Showing top 20 results out of 315)

How to use
CombineHiveInputFormat$CombineHiveInputSplit
in
org.apache.hadoop.hive.ql.io