/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers and read directly * from snapshot files. * @param snapshotName The name of the snapshot (of a table) to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is carrying all necessary HBase * configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured job classes via * the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of * rootdir. After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapperJob(String snapshotName, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapperJob(snapshotName, scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class); resetCacheConfig(job.getConfiguration()); }
private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { TableSnapshotInputFormat tsif = new TableSnapshotInputFormat(); List<InputSplit> splits = tsif.getSplits(job); when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext); rr.initialize(split, taskAttemptContext);
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.tableSnapshotInputFormat.getSplits(jobContext); }
@Override public RecordReader<StaticBuffer, Iterable<Entry>> createRecordReader(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { tableReader = tableSnapshotInputFormat.createRecordReader(inputSplit, taskAttemptContext); janusgraphRecordReader = new HBaseBinaryRecordReader(tableReader, edgeStoreFamily); return janusgraphRecordReader; }
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { return this.tableSnapshotInputFormat.getSplits(jobContext); }
@Override public RecordReader<StaticBuffer, Iterable<Entry>> createRecordReader(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { tableReader = tableSnapshotInputFormat.createRecordReader(inputSplit, taskAttemptContext); janusgraphRecordReader = new HBaseBinaryRecordReader(tableReader, edgeStoreFamily); return janusgraphRecordReader; }
private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { TableSnapshotInputFormat tsif = new TableSnapshotInputFormat(); List<InputSplit> splits = tsif.getSplits(job); when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext); rr.initialize(split, taskAttemptContext);
int numSplitsPerRegion) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir, splitAlgo, numSplitsPerRegion); initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits, byte[] startRow, byte[] stopRow) throws IOException, InterruptedException { TableSnapshotInputFormat tsif = new TableSnapshotInputFormat(); List<InputSplit> splits = tsif.getSplits(job); when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); RecordReader<ImmutableBytesWritable, Result> rr = tsif.createRecordReader(split, taskAttemptContext); rr.initialize(split, taskAttemptContext);
/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers and read directly * from snapshot files. * @param snapshotName The name of the snapshot (of a table) to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is carrying all necessary HBase * configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured job classes via * the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of * rootdir. After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapperJob(String snapshotName, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapperJob(snapshotName, scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class); resetCacheConfig(job.getConfiguration()); }
boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapperJob(snapshotName, scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
/** * Sets up the job for reading from a table snapshot. It bypasses hbase servers and read directly * from snapshot files. * @param snapshotName The name of the snapshot (of a table) to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is carrying all necessary HBase * configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured job classes via * the distributed cache (tmpjars). * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should * have write permissions to this directory, and this should not be a subdirectory of * rootdir. After the job is finished, restore directory can be deleted. * @throws IOException When setting up the details fails. * @see TableSnapshotInputFormat */ public static void initTableSnapshotMapperJob(String snapshotName, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, Job job, boolean addDependencyJars, Path tmpRestoreDir) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir); initTableMapperJob(snapshotName, scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class); resetCacheConfig(job.getConfiguration()); }
int numSplitsPerRegion) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir, splitAlgo, numSplitsPerRegion); initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
int numSplitsPerRegion) throws IOException { TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir, splitAlgo, numSplitsPerRegion); initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
TableSnapshotInputFormat.setInput(job, snapshotName, restoreDir); config.set(SNAPSHOT_NAME_KEY, job.getConfiguration().get(SNAPSHOT_NAME_KEY)); config.set(RESTORE_DIR_KEY, job.getConfiguration().get(RESTORE_DIR_KEY));
TableSnapshotInputFormat.setInput(job, snapshotName, restoreDir); config.set(SNAPSHOT_NAME_KEY, job.getConfiguration().get(SNAPSHOT_NAME_KEY)); config.set(RESTORE_DIR_KEY, job.getConfiguration().get(RESTORE_DIR_KEY));