org.apache.hadoop.hbase.mapreduce.TableInputFormat.getSplits java code examples

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
  return this.tableInputFormat.getSplits(jobContext);
}

@Test
public void testNonSuccessiveSplitsAreNotMerged() throws IOException {
 JobContext context = mock(JobContext.class);
 Configuration conf = HBaseConfiguration.create();
 conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL,
   ConnectionForMergeTesting.class.getName());
 conf.set(TableInputFormat.INPUT_TABLE, "testTable");
 conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
 when(context.getConfiguration()).thenReturn(conf);
 TableInputFormat tifExclude = new TableInputFormatForMergeTesting();
 tifExclude.setConf(conf);
 // split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged,
 // but split["a", "b"] and split["c", "d"] are not merged.
 assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1,
   tifExclude.getSplits(context).size());
}

/**
 * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX
 * This test does not run MR job
 */
protected void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits)
  throws IOException, InterruptedException, ClassNotFoundException {
 String jobName = "TestJobForNumOfSplits";
 LOG.info("Before map/reduce startup - job " + jobName);
 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
 Scan scan = new Scan();
 scan.addFamily(INPUT_FAMILYS[0]);
 scan.addFamily(INPUT_FAMILYS[1]);
 c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion);
 c.set(KEY_STARTROW, "");
 c.set(KEY_LASTROW, "");
 Job job = Job.getInstance(c, jobName);
 TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
  ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 TableInputFormat tif = new TableInputFormat();
 tif.setConf(job.getConfiguration());
 Assert.assertEquals(TABLE_NAME, table.getName());
 List<InputSplit> splits = tif.getSplits(job);
 Assert.assertEquals(expectedNumOfSplits, splits.size());
}

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
  return this.tableInputFormat.getSplits(jobContext);
}

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
  return this.tableInputFormat.getSplits(jobContext);
}

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
  return this.tableInputFormat.getSplits(jobContext);
}

@Override
public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException {
  return this.tableInputFormat.getSplits(jobContext);
}

@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 return getDelegate(conf).getSplits(jobContext);
}

@Override
public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 return getDelegate(conf).getSplits(jobContext);
}

@Override
public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits)
 throws IOException {
 inputFormat.setConf(job);
 return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null,
  Reporter.NULL)));
}

 @Override
 public List<InputSplit> getSplits(JobContext context) throws IOException {
  List<InputSplit> allSplits = new ArrayList<>();
  Scan originalScan = getScan();

  Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan);

  for (Scan scan : scans) {
   // Internally super.getSplits(...) uses scan object stored in private variable,
   // to re-use the code of super class we switch scan object with scans we
   setScan(scan);
   List<InputSplit> splits = super.getSplits(context);
   allSplits.addAll(splits);
  }

  // Setting original scan back
  setScan(originalScan);

  return allSplits;
 }
}

 @Override
 public List<InputSplit> getSplits(JobContext context) throws IOException {
  List<InputSplit> allSplits = new ArrayList<>();
  Scan originalScan = getScan();

  Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan);

  for (Scan scan : scans) {
   // Internally super.getSplits(...) uses scan object stored in private variable,
   // to re-use the code of super class we switch scan object with scans we
   setScan(scan);
   List<InputSplit> splits = super.getSplits(context);
   allSplits.addAll(splits);
  }

  // Setting original scan back
  setScan(originalScan);

  return allSplits;
 }
}

 /** {@inheritDoc} */
 @Override
 public List<InputSplit> getSplits(JobContext context) throws IOException {
  List<InputSplit> splits = super.getSplits(context);

  // TableInputFormat opens an HTable within its setConf() method, since it is
  // required during getSplits().
  HTableInterface openedTable = getHTable();
  assert null != openedTable;

  // After getSplits(), it is no longer necessary, so let's close it.
  openedTable.close();
  setHTable(null);

  return splits;
 }
}

 @Override
 public List<InputSplit> getSplits(JobContext context) throws IOException {
  List<InputSplit> allSplits = new ArrayList<InputSplit>();
  Scan originalScan = getScan();

  Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan);

  for (Scan scan : scans) {
   // Internally super.getSplits(...) uses scan object stored in private variable,
   // to re-use the code of super class we switch scan object with scans we
   setScan(scan);
   List<InputSplit> splits = super.getSplits(context);
   allSplits.addAll(splits);
  }

  // Setting original scan back
  setScan(originalScan);

  return allSplits;
 }
}

@Override
public List<InputSplit> getSplits(org.apache.hadoop.mapreduce.JobContext context)
throws IOException {
  List<InputSplit> splits = super.getSplits(context);
  ListIterator<InputSplit> splitIter = splits.listIterator();
  while (splitIter.hasNext()) {
    TableSplit split = (TableSplit) splitIter.next();
    byte[] startKey = split.getStartRow();
    byte[] endKey = split.getEndRow();
    // Skip if the region doesn't satisfy configured options.
    if ((skipRegion(CompareOp.LESS, startKey, lt_)) ||
        (skipRegion(CompareOp.GREATER, endKey, gt_)) ||
        (skipRegion(CompareOp.GREATER, endKey, gte_)) ||
        (skipRegion(CompareOp.LESS_OR_EQUAL, startKey, lte_)) )  {
      splitIter.remove();
    }
  }
  return splits;
}

  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException {
    if (isMock()) {
      if (table == null) {
        initialize(context);
      }
      List<InputSplit> splits = new ArrayList<>(1);
      TableSplit split = new TableSplit(getTable().getName(), getScan(),
          HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, "", 0);
      splits.add(split);
      return splits;
    } else {
      return super.getSplits(context);
    }
  }
}

@Test
public void testNonSuccessiveSplitsAreNotMerged() throws IOException {
 JobContext context = mock(JobContext.class);
 Configuration conf = HBaseConfiguration.create();
 conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL,
   ConnectionForMergeTesting.class.getName());
 conf.set(TableInputFormat.INPUT_TABLE, "testTable");
 conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
 when(context.getConfiguration()).thenReturn(conf);
 TableInputFormat tifExclude = new TableInputFormatForMergeTesting();
 tifExclude.setConf(conf);
 // split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged,
 // but split["a", "b"] and split["c", "d"] are not merged.
 assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1,
   tifExclude.getSplits(context).size());
}

List<TableSplit> splits = tableInputFormat.getSplits(context)
    .stream().map(x -> (TableSplit) x).collect(Collectors.toList());

/**
 * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX
 * This test does not run MR job
 */
protected void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits)
  throws IOException, InterruptedException, ClassNotFoundException {
 String jobName = "TestJobForNumOfSplits";
 LOG.info("Before map/reduce startup - job " + jobName);
 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
 Scan scan = new Scan();
 scan.addFamily(INPUT_FAMILYS[0]);
 scan.addFamily(INPUT_FAMILYS[1]);
 c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion);
 c.set(KEY_STARTROW, "");
 c.set(KEY_LASTROW, "");
 Job job = Job.getInstance(c, jobName);
 TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
  ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 TableInputFormat tif = new TableInputFormat();
 tif.setConf(job.getConfiguration());
 Assert.assertEquals(TABLE_NAME, table.getName());
 List<InputSplit> splits = tif.getSplits(job);
 Assert.assertEquals(expectedNumOfSplits, splits.size());
}

/**
 * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX
 * This test does not run MR job
 *
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException,
  InterruptedException,
  ClassNotFoundException {
 String jobName = "TestJobForNumOfSplits";
 LOG.info("Before map/reduce startup - job " + jobName);
 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
 Scan scan = new Scan();
 scan.addFamily(INPUT_FAMILYS[0]);
 scan.addFamily(INPUT_FAMILYS[1]);
 c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion);
 c.set(KEY_STARTROW, "");
 c.set(KEY_LASTROW, "");
 Job job = new Job(c, jobName);
 TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 TableInputFormat tif = new TableInputFormat();
 tif.setConf(job.getConfiguration());
 Assert.assertEquals(TABLE_NAME, table.getName());
 List<InputSplit> splits = tif.getSplits(job);
 Assert.assertEquals(expectedNumOfSplits, splits.size());
}

Javadoc

Calculates the splits that will serve as input for the map tasks. The number of splits matches the number of regions in a table. Splits are shuffled if required.

Popular methods of TableInputFormat

setConf
Sets the configuration. This is used to set the details for the table to be scanned.
getConf
Returns the current configuration.
<init>
addColumns
Adds an array of columns specified using old format, family:qualifier. Overrides previous calls to S
createRecordReader
setScan
configureSplitTable
Sets split table in map-reduce job.
initializeTable
addColumn
Parses a combined family and qualifier and adds either both or just the family in case there is no q
calculateAutoBalancedSplits
createScanFromConfiguration
Sets up a Scan instance, applying settings from the configuration property constants defined in Tabl
reverseDNS

Popular in Java

Creating JSON documents from java classes using gson
getContentResolver (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
onCreateOptionsMenu (Activity)
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Top plugins for WebStorm

How to use getSplitsmethodin org.apache.hadoop.hbase.mapreduce.TableInputFormat

Best Java code snippets using org.apache.hadoop.hbase.mapreduce.TableInputFormat.getSplits (Showing top 20 results out of 315)

How to use
getSplits
method
in
org.apache.hadoop.hbase.mapreduce.TableInputFormat