CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo); result.add(csplit);
/** * Writable interface. */ @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); if (inputFormatClassName == null) { if (pathToPartitionInfo == null) { pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } out.writeUTF(inputFormatClassName); } }
private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException { PartitionDesc part = null; Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>(); for (Path path : hsplit.getPaths()) { PartitionDesc otherPart = HiveFileFormatUtils.getPartitionDescFromPathRecursively( pathToPartInfo, path, cache); LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo); if (part == null) { part = otherPart; } else if (otherPart != part) { // Assume we should have the exact same object. // TODO: we could also compare the schema and SerDe, and pass only those to the call // instead; most of the time these would be the same and LLAP IO can handle that. LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {" + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}"); return null; } } return part; }
CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo); result.add(csplit);
CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is); result.add(csplit);
/** * Create a generic Hive RecordReader than can iterate over all chunks in a * CombinedFileSplit. */ @Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { if (!(split instanceof CombineHiveInputSplit)) { return super.getRecordReader(split, job, reporter); } CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split; String inputFormatClassName = null; Class inputFormatClass = null; try { inputFormatClassName = hsplit.inputFormatClassName(); inputFormatClass = job.getClassByName(inputFormatClassName); } catch (Exception e) { throw new IOException("cannot find class " + inputFormatClassName); } pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(0).toString(), hsplit.getPath(0).toUri().getPath()); return ShimLoader.getHadoopShims().getCombineFileInputFormat() .getRecordReader(job, ((CombineHiveInputSplit) split).getInputSplitShim(), reporter, CombineHiveRecordReader.class); }
/** * Create a generic Hive RecordReader than can iterate over all chunks in a * CombinedFileSplit. */ @Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { if (!(split instanceof CombineHiveInputSplit)) { return super.getRecordReader(split, job, reporter); } CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split; String inputFormatClassName = null; Class inputFormatClass = null; try { inputFormatClassName = hsplit.inputFormatClassName(); inputFormatClass = job.getClassByName(inputFormatClassName); } catch (Exception e) { throw new IOException("cannot find class " + inputFormatClassName); } pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(0).toString(), hsplit.getPath(0).toUri().getPath()); return ShimLoader.getHadoopShims().getCombineFileInputFormat() .getRecordReader(job, (CombineFileSplit) split, reporter, CombineHiveRecordReader.class); }
/** * Writable interface. */ @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); if (inputFormatClassName == null) { if (pathToPartitionInfo == null) { pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } out.writeUTF(inputFormatClassName); } }
public CombineHiveRecordReader(InputSplit split, Configuration conf, Reporter reporter, Integer partition) throws IOException { super((JobConf)conf); CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ? (CombineHiveInputSplit) split : new CombineHiveInputSplit(jobConf, (CombineFileSplit) split); String inputFormatClassName = hsplit.inputFormatClassName(); Class inputFormatClass = null; try { inputFormatClass = JavaUtils.loadClass(inputFormatClassName); } catch (ClassNotFoundException e) { throw new IOException("CombineHiveRecordReader: class not found " + inputFormatClassName); } InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache( inputFormatClass, jobConf); // create a split for the given partition FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit .getLocations()); this.setRecordReader(inputFormat.getRecordReader(fsplit, jobConf, reporter)); this.initIOContext(fsplit, jobConf, inputFormatClass, this.recordReader); }
public CombineHiveRecordReader(InputSplit split, Configuration conf, Reporter reporter, Integer partition) throws IOException { JobConf job = (JobConf) conf; CombineHiveInputSplit hsplit = new CombineHiveInputSplit(job, (InputSplitShim) split); String inputFormatClassName = hsplit.inputFormatClassName(); Class inputFormatClass = null; try { inputFormatClass = Class.forName(inputFormatClassName); } catch (ClassNotFoundException e) { throw new IOException("CombineHiveRecordReader: class not found " + inputFormatClassName); } InputFormat inputFormat = HiveInputFormat.getInputFormatFromCache( inputFormatClass, job); // create a split for the given partition FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit .getLocations()); this.recordReader = inputFormat.getRecordReader(fsplit, job, reporter); this.initIOContext(fsplit, job, inputFormatClass, this.recordReader); }
/** * Writable interface. */ public void write(DataOutput out) throws IOException { inputSplitShim.write(out); if (inputFormatClassName == null) { Map<String, PartitionDesc> pathToPartitionInfo = Utilities .getMapRedWork(getJob()).getPathToPartitionInfo(); // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } out.writeUTF(inputFormatClassName); } }
CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo); result.add(csplit);
CombineHiveInputFormat.CombineHiveInputSplit combineSplit = (CombineHiveInputFormat.CombineHiveInputSplit) splits[2]; assertEquals(BUCKETS, combineSplit.getNumPaths()); for(int bucket=0; bucket < BUCKETS; ++bucket) { assertEquals("mock:/combinationAcid/p=1/00000" + bucket + "_0", combineSplit.getPath(bucket).toString()); assertEquals(0, combineSplit.getOffset(bucket)); assertEquals(251, combineSplit.getLength(bucket)); String[] hosts = combineSplit.getLocations(); assertEquals(2, hosts.length);
assertEquals(2, split.getNumPaths()); assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString()); assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString()); assertEquals(length0, split.getLength(0)); assertEquals(getLength(path1, conf), split.getLength(1)); assertEquals(0, split.getOffset(0)); assertEquals(0, split.getOffset(1)); assertTrue(3 >= split.getLocations().length);
CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ? (CombineHiveInputSplit) split : new CombineHiveInputSplit(jobConf, (CombineFileSplit) split); String inputFormatClassName = hsplit.inputFormatClassName(); Class inputFormatClass = null; try { FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit .getLocations()); if (partition != 0 && hsplit.getPaths()[partition -1].equals(hsplit.getPaths()[partition])) this.setFooterBuffer(((CombineHiveRecordReader)preReader).getFooterBuffer());
CombineHiveInputSplit hsplit = split instanceof CombineHiveInputSplit ? (CombineHiveInputSplit) split : new CombineHiveInputSplit(jobConf, (CombineFileSplit) split); String inputFormatClassName = hsplit.inputFormatClassName(); Class inputFormatClass = null; try { FileSplit fsplit = new FileSplit(hsplit.getPaths()[partition], hsplit .getStartOffsets()[partition], hsplit.getLengths()[partition], hsplit .getLocations()); if (partition != 0 && hsplit.getPaths()[partition -1].equals(hsplit.getPaths()[partition])) this.setFooterBuffer(((CombineHiveRecordReader)preReader).getFooterBuffer());
/** * Create a generic Hive RecordReader than can iterate over all chunks in a * CombinedFileSplit. */ @Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { if (!(split instanceof CombineHiveInputSplit)) { return super.getRecordReader(split, job, reporter); } CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split; String inputFormatClassName = null; Class inputFormatClass = null; try { inputFormatClassName = hsplit.inputFormatClassName(); inputFormatClass = job.getClassByName(inputFormatClassName); } catch (Exception e) { throw new IOException("cannot find class " + inputFormatClassName); } pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(0)); return ShimLoader.getHadoopShims().getCombineFileInputFormat() .getRecordReader(job, (CombineFileSplit) split, reporter, CombineHiveRecordReader.class); }
/** * Create a generic Hive RecordReader than can iterate over all chunks in a * CombinedFileSplit. */ @Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { if (!(split instanceof CombineHiveInputSplit)) { return super.getRecordReader(split, job, reporter); } CombineHiveInputSplit hsplit = (CombineHiveInputSplit) split; String inputFormatClassName = null; Class inputFormatClass = null; try { inputFormatClassName = hsplit.inputFormatClassName(); inputFormatClass = job.getClassByName(inputFormatClassName); } catch (Exception e) { throw new IOException("cannot find class " + inputFormatClassName); } pushProjectionsAndFilters(job, inputFormatClass, hsplit.getPath(0)); return ShimLoader.getHadoopShims().getCombineFileInputFormat() .getRecordReader(job, (CombineFileSplit) split, reporter, CombineHiveRecordReader.class); }
/** * Writable interface. */ @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); if (inputFormatClassName == null) { if (pathToPartitionInfo == null) { pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } out.writeUTF(inputFormatClassName); } }
private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException { PartitionDesc part = null; Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>(); for (Path path : hsplit.getPaths()) { PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively( pathToPartInfo, path, cache); LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo); if (part == null) { part = otherPart; } else if (otherPart != part) { // Assume we should have the exact same object. // TODO: we could also compare the schema and SerDe, and pass only those to the call // instead; most of the time these would be the same and LLAP IO can handle that. LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {" + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}"); return null; } } return part; }