@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.tableInputFormat.getSplits(jobContext); }
@Test public void testNonSuccessiveSplitsAreNotMerged() throws IOException { JobContext context = mock(JobContext.class); Configuration conf = HBaseConfiguration.create(); conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL, ConnectionForMergeTesting.class.getName()); conf.set(TableInputFormat.INPUT_TABLE, "testTable"); conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true); when(context.getConfiguration()).thenReturn(conf); TableInputFormat tifExclude = new TableInputFormatForMergeTesting(); tifExclude.setConf(conf); // split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged, // but split["a", "b"] and split["c", "d"] are not merged. assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1, tifExclude.getSplits(context).size()); }
/** * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX * This test does not run MR job */ protected void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits"; LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableInputFormat tif = new TableInputFormat(); tif.setConf(job.getConfiguration()); Assert.assertEquals(TABLE_NAME, table.getName()); List<InputSplit> splits = tif.getSplits(job); Assert.assertEquals(expectedNumOfSplits, splits.size()); }
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.tableInputFormat.getSplits(jobContext); }
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.tableInputFormat.getSplits(jobContext); }
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.tableInputFormat.getSplits(jobContext); }
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.tableInputFormat.getSplits(jobContext); }
@Override public List<InputSplit> getSplits(JobContext jobContext) throws IOException { Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext); return getDelegate(conf).getSplits(jobContext); }
@Override public List<InputSplit> getSplits(JobContext jobContext) throws IOException { Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext); return getDelegate(conf).getSplits(jobContext); }
@Override public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { inputFormat.setConf(job); return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, Reporter.NULL))); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { List<InputSplit> allSplits = new ArrayList<>(); Scan originalScan = getScan(); Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan); for (Scan scan : scans) { // Internally super.getSplits(...) uses scan object stored in private variable, // to re-use the code of super class we switch scan object with scans we setScan(scan); List<InputSplit> splits = super.getSplits(context); allSplits.addAll(splits); } // Setting original scan back setScan(originalScan); return allSplits; } }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { List<InputSplit> allSplits = new ArrayList<>(); Scan originalScan = getScan(); Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan); for (Scan scan : scans) { // Internally super.getSplits(...) uses scan object stored in private variable, // to re-use the code of super class we switch scan object with scans we setScan(scan); List<InputSplit> splits = super.getSplits(context); allSplits.addAll(splits); } // Setting original scan back setScan(originalScan); return allSplits; } }
/** {@inheritDoc} */ @Override public List<InputSplit> getSplits(JobContext context) throws IOException { List<InputSplit> splits = super.getSplits(context); // TableInputFormat opens an HTable within its setConf() method, since it is // required during getSplits(). HTableInterface openedTable = getHTable(); assert null != openedTable; // After getSplits(), it is no longer necessary, so let's close it. openedTable.close(); setHTable(null); return splits; } }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { List<InputSplit> allSplits = new ArrayList<InputSplit>(); Scan originalScan = getScan(); Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan); for (Scan scan : scans) { // Internally super.getSplits(...) uses scan object stored in private variable, // to re-use the code of super class we switch scan object with scans we setScan(scan); List<InputSplit> splits = super.getSplits(context); allSplits.addAll(splits); } // Setting original scan back setScan(originalScan); return allSplits; } }
@Override public List<InputSplit> getSplits(org.apache.hadoop.mapreduce.JobContext context) throws IOException { List<InputSplit> splits = super.getSplits(context); ListIterator<InputSplit> splitIter = splits.listIterator(); while (splitIter.hasNext()) { TableSplit split = (TableSplit) splitIter.next(); byte[] startKey = split.getStartRow(); byte[] endKey = split.getEndRow(); // Skip if the region doesn't satisfy configured options. if ((skipRegion(CompareOp.LESS, startKey, lt_)) || (skipRegion(CompareOp.GREATER, endKey, gt_)) || (skipRegion(CompareOp.GREATER, endKey, gte_)) || (skipRegion(CompareOp.LESS_OR_EQUAL, startKey, lte_)) ) { splitIter.remove(); } } return splits; }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException { if (isMock()) { if (table == null) { initialize(context); } List<InputSplit> splits = new ArrayList<>(1); TableSplit split = new TableSplit(getTable().getName(), getScan(), HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, "", 0); splits.add(split); return splits; } else { return super.getSplits(context); } } }
@Test public void testNonSuccessiveSplitsAreNotMerged() throws IOException { JobContext context = mock(JobContext.class); Configuration conf = HBaseConfiguration.create(); conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL, ConnectionForMergeTesting.class.getName()); conf.set(TableInputFormat.INPUT_TABLE, "testTable"); conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true); when(context.getConfiguration()).thenReturn(conf); TableInputFormat tifExclude = new TableInputFormatForMergeTesting(); tifExclude.setConf(conf); // split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged, // but split["a", "b"] and split["c", "d"] are not merged. assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1, tifExclude.getSplits(context).size()); }
List<TableSplit> splits = tableInputFormat.getSplits(context) .stream().map(x -> (TableSplit) x).collect(Collectors.toList());
/** * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX * This test does not run MR job */ protected void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits"; LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableInputFormat tif = new TableInputFormat(); tif.setConf(job.getConfiguration()); Assert.assertEquals(TABLE_NAME, table.getName()); List<InputSplit> splits = tif.getSplits(job); Assert.assertEquals(expectedNumOfSplits, splits.size()); }
/** * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX * This test does not run MR job * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits"; LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = new Job(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableInputFormat tif = new TableInputFormat(); tif.setConf(job.getConfiguration()); Assert.assertEquals(TABLE_NAME, table.getName()); List<InputSplit> splits = tif.getSplits(job); Assert.assertEquals(expectedNumOfSplits, splits.size()); }