this.tableInputFormat.setConf(config);
@Test public void testNonSuccessiveSplitsAreNotMerged() throws IOException { JobContext context = mock(JobContext.class); Configuration conf = HBaseConfiguration.create(); conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL, ConnectionForMergeTesting.class.getName()); conf.set(TableInputFormat.INPUT_TABLE, "testTable"); conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true); when(context.getConfiguration()).thenReturn(conf); TableInputFormat tifExclude = new TableInputFormatForMergeTesting(); tifExclude.setConf(conf); // split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged, // but split["a", "b"] and split["c", "d"] are not merged. assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1, tifExclude.getSplits(context).size()); }
/** * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX * This test does not run MR job */ protected void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits"; LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableInputFormat tif = new TableInputFormat(); tif.setConf(job.getConfiguration()); Assert.assertEquals(TABLE_NAME, table.getName()); List<InputSplit> splits = tif.getSplits(job); Assert.assertEquals(expectedNumOfSplits, splits.size()); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf.get(ROW_KEY_DISTRIBUTOR_CLASS) != null) { String clazz = conf.get(ROW_KEY_DISTRIBUTOR_CLASS); try { rowKeyDistributor = (AbstractRowKeyDistributor) Class.forName(clazz).newInstance(); if (conf.get(ROW_KEY_DISTRIBUTOR_PARAMS) != null) { rowKeyDistributor.init(conf.get(ROW_KEY_DISTRIBUTOR_PARAMS)); } } catch (Exception e) { throw new RuntimeException("Cannot create row key distributor, " + ROW_KEY_DISTRIBUTOR_CLASS + ": " + clazz, e); } } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf.get(ROW_KEY_DISTRIBUTOR_CLASS) != null) { String clazz = conf.get(ROW_KEY_DISTRIBUTOR_CLASS); try { rowKeyDistributor = (AbstractRowKeyDistributor) Class.forName(clazz).newInstance(); if (conf.get(ROW_KEY_DISTRIBUTOR_PARAMS) != null) { rowKeyDistributor.init(conf.get(ROW_KEY_DISTRIBUTOR_PARAMS)); } } catch (Exception e) { throw new RuntimeException("Cannot create row key distributor, " + ROW_KEY_DISTRIBUTOR_CLASS + ": " + clazz, e); } } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf.get(ROW_KEY_DISTRIBUTOR_CLASS) != null) { String clazz = conf.get(ROW_KEY_DISTRIBUTOR_CLASS); try { rowKeyDistributor = (AbstractRowKeyDistributor) Class.forName(clazz).newInstance(); if (conf.get(ROW_KEY_DISTRIBUTOR_PARAMS) != null) { rowKeyDistributor.init(conf.get(ROW_KEY_DISTRIBUTOR_PARAMS)); } } catch (Exception e) { throw new RuntimeException("Cannot create row key distributor, " + ROW_KEY_DISTRIBUTOR_CLASS + ": " + clazz, e); } } }
@Override public void setConf(final Configuration config) { this.graph = new FaunusTitanHBaseGraph(GraphFactory.generateTitanConfiguration(config, FAUNUS_GRAPH_INPUT_TITAN)); this.vertexQuery = VertexQueryFilter.create(config); this.pathEnabled = config.getBoolean(FaunusCompiler.PATH_ENABLED, false); //config.set(TableInputFormat.SCAN_COLUMN_FAMILY, Backend.EDGESTORE_NAME); config.set(TableInputFormat.INPUT_TABLE, config.get(FAUNUS_GRAPH_INPUT_TITAN_STORAGE_TABLENAME)); config.set(HConstants.ZOOKEEPER_QUORUM, config.get(FAUNUS_GRAPH_INPUT_TITAN_STORAGE_HOSTNAME)); if (config.get(FAUNUS_GRAPH_INPUT_TITAN_STORAGE_PORT, null) != null) config.set(HConstants.ZOOKEEPER_CLIENT_PORT, config.get(FAUNUS_GRAPH_INPUT_TITAN_STORAGE_PORT)); config.set("storage.read-only", "true"); config.set("autotype", "none"); Scan scanner = new Scan(); scanner.addFamily(Backend.EDGESTORE_NAME.getBytes()); scanner.setFilter(getColumnFilter(this.vertexQuery)); //TODO (minor): should we set other options in http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html for optimization? Method converter; try { converter = TableMapReduceUtil.class.getDeclaredMethod("convertScanToString", Scan.class); converter.setAccessible(true); config.set(TableInputFormat.SCAN, (String) converter.invoke(null, scanner)); } catch (Exception e) { throw new RuntimeException(e); } this.tableInputFormat.setConf(config); }
@Override public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { inputFormat.setConf(job); return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, Reporter.NULL))); }
private TableInputFormat getDelegate(Configuration conf) throws IOException { TableInputFormat delegate = new TableInputFormat(); String tableName = HBaseMetadataProvider.getTableName(dataset.getName()); conf.set(TableInputFormat.INPUT_TABLE, tableName); if (view != null) { Job tempJob = new Job(); Scan scan = ((BaseEntityScanner) view.newEntityScanner()).getScan(); TableMapReduceUtil.initTableMapperJob(tableName, scan, TableMapper.class, null, null, tempJob); Configuration tempConf = Hadoop.JobContext.getConfiguration.invoke(tempJob); conf.set(SCAN, tempConf.get(SCAN)); } delegate.setConf(conf); return delegate; }
private TableInputFormat getDelegate(Configuration conf) throws IOException { TableInputFormat delegate = new TableInputFormat(); String tableName = HBaseMetadataProvider.getTableName(dataset.getName()); conf.set(TableInputFormat.INPUT_TABLE, tableName); if (view != null) { Job tempJob = new Job(); Scan scan = ((BaseEntityScanner) view.newEntityScanner()).getScan(); TableMapReduceUtil.initTableMapperJob(tableName, scan, TableMapper.class, null, null, tempJob); Configuration tempConf = Hadoop.JobContext.getConfiguration.invoke(tempJob); conf.set(SCAN, tempConf.get(SCAN)); } delegate.setConf(conf); return delegate; }
this.tableInputFormat.setConf(config);
this.tableInputFormat.setConf(config);
this.tableInputFormat.setConf(config);
@Test public void testNonSuccessiveSplitsAreNotMerged() throws IOException { JobContext context = mock(JobContext.class); Configuration conf = HBaseConfiguration.create(); conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL, ConnectionForMergeTesting.class.getName()); conf.set(TableInputFormat.INPUT_TABLE, "testTable"); conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true); when(context.getConfiguration()).thenReturn(conf); TableInputFormat tifExclude = new TableInputFormatForMergeTesting(); tifExclude.setConf(conf); // split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged, // but split["a", "b"] and split["c", "d"] are not merged. assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1, tifExclude.getSplits(context).size()); }
tableInputFormat.setConf(connection.getConfiguration()); tableInputFormat.setScan(scan);
@Override public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader( InputSplit split, JobConf job, Reporter reporter) throws IOException { String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO); InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); String tableName = job.get(TableInputFormat.INPUT_TABLE); TableSplit tSplit = (TableSplit) split; HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job); inputFormat.setConf(job); Scan inputScan = inputFormat.getScan(); // TODO: Make the caching configurable by the user inputScan.setCaching(200); inputScan.setCacheBlocks(false); Scan sc = new Scan(inputScan); sc.setStartRow(tSplit.getStartRow()); sc.setStopRow(tSplit.getEndRow()); recordReader.setScan(sc); recordReader.setHTable(new HTable(job, tableName)); recordReader.init(); return recordReader; }
/** * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX * This test does not run MR job */ protected void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits"; LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableInputFormat tif = new TableInputFormat(); tif.setConf(job.getConfiguration()); Assert.assertEquals(TABLE_NAME, table.getName()); List<InputSplit> splits = tif.getSplits(job); Assert.assertEquals(expectedNumOfSplits, splits.size()); }
/** * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX * This test does not run MR job * * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits"; LOG.info("Before map/reduce startup - job " + jobName); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = new Job(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); TableInputFormat tif = new TableInputFormat(); tif.setConf(job.getConfiguration()); Assert.assertEquals(TABLE_NAME, table.getName()); List<InputSplit> splits = tif.getSplits(job); Assert.assertEquals(expectedNumOfSplits, splits.size()); }
tableInputFormat = new TableInputFormat(); tableInputFormat.setConf(configuration); inputFormat = tableInputFormat;