/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. * * @deprecated Use {@link #configureIncrementalLoad(Job, Table, RegionLocator)} instead. */ @Deprecated public static void configureIncrementalLoad(Job job, HTable table) throws IOException { configureIncrementalLoad(job, table.getTableDescriptor(), table.getRegionLocator()); }
final Pair<byte[][], byte[][]> keys = table.getRegionLocator().getStartEndKeys(); if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) { throw new IOException("Expecting at least one region."); final byte[] startKey = keys.getFirst()[i]; final byte[] endKey = keys.getSecond()[i]; final String regionLocation = table.getRegionLocator().getRegionLocation(startKey, false).getHostnamePort();
byte[] currentKey = startKey; do { HRegionLocation regionLocation = getRegionLocator().getRegionLocation(currentKey, reload); keysInRange.add(currentKey); regionsInRange.add(regionLocation);
HFileOutputFormat2.configureIncrementalLoad(job, htable, htable.getRegionLocator());
private HRegionFileSystem getHRegionFS(HTable table, Configuration conf) throws IOException { FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem(); Path tableDir = FSUtils.getTableDir(TEST_UTIL.getDefaultRootDirPath(), table.getName()); List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir); assertEquals(1, regionDirs.size()); List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDirs.get(0)); assertEquals(2, familyDirs.size()); RegionInfo hri = table.getRegionLocator().getAllRegionLocations().get(0).getRegionInfo(); HRegionFileSystem regionFs = new HRegionFileSystem(conf, new HFileSystem(fs), tableDir, hri); return regionFs; }
table.getRegionLocator().getRegionLocation(row).getRegion(); ServerName currentServer = cluster.getServerHoldingRegion(tn, region.getRegionName()); for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
byte[] currentKey = startKey; do { HRegionLocation regionLocation = getRegionLocator().getRegionLocation(currentKey, reload); keysInRange.add(currentKey); regionsInRange.add(regionLocation);
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. * * @deprecated Use {@link #configureIncrementalLoad(Job, Table, RegionLocator)} instead. */ @Deprecated public static void configureIncrementalLoad(Job job, HTable table) throws IOException { configureIncrementalLoad(job, table.getTableDescriptor(), table.getRegionLocator()); }
/** * Processes a List of Rows (Put, Delete) and writes them to an HTable instance in RegionServer buckets via the htable.batch method. * <br><br> * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs, thus this will * produce one RPC of Puts per RegionServer. * <br><br> * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. * <br> * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. * <br> * Assumption #3: That the input list of Rows is big enough to be useful (in the thousands or more). The intent of this * method is to process larger chunks of data. * <br><br> * This method accepts a list of Row objects because the underlying .batch method accepts a list of Row objects. * <br><br> * @param htable HTable instance for target HBase table * @param rows List of Row instances * @throws IOException if a remote or network exception occurs */ public static void bucketRsBatch(HTable htable, List<Row> rows) throws IOException { try { Map<String, List<Row>> rowMap = createRsRowMap(htable.getRegionLocator(), rows); for (List<Row> rsRows: rowMap.values()) { htable.batch( rsRows ); } } catch (InterruptedException e) { throw (InterruptedIOException)new InterruptedIOException().initCause(e); } }
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. * * @deprecated Use {@link #configureIncrementalLoad(Job, Table, RegionLocator)} instead. */ @Deprecated public static void configureIncrementalLoad(Job job, HTable table) throws IOException { configureIncrementalLoad(job, table.getTableDescriptor(), table.getRegionLocator()); }
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, HTable table) throws IOException { HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), table.getRegionLocator()); }
/** * Computes size of each region for table and given column families. * * @deprecated Use {@link #RegionSizeCalculator(RegionLocator, Admin)} instead. */ @Deprecated public RegionSizeCalculator(HTable table) throws IOException { HBaseAdmin admin = new HBaseAdmin(table.getConfiguration()); try { init(table.getRegionLocator(), admin); } finally { admin.close(); } }
/** * Processes a List of Puts and writes them to an HTable instance in RegionServer buckets via the htable.put method. * This will utilize the writeBuffer, thus the writeBuffer flush frequency may be tuned accordingly via htable.setWriteBufferSize. * <br><br> * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs in each flush. * <br><br> * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. * <br> * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. * <br> * Assumption #3: That the input list of Puts is big enough to be useful (in the thousands or more). The intent of this * method is to process larger chunks of data. * <br> * Assumption #4: htable.setAutoFlush(false) has been set. This is a requirement to use the writeBuffer. * <br><br> * @param htable HTable instance for target HBase table * @param puts List of Put instances * @throws IOException if a remote or network exception occurs * */ public static void bucketRsPut(HTable htable, List<Put> puts) throws IOException { Map<String, List<Put>> putMap = createRsPutMap(htable.getRegionLocator(), puts); for (List<Put> rsPuts: putMap.values()) { htable.put( rsPuts ); } htable.flushCommits(); }
final Pair<byte[][], byte[][]> keys = table.getRegionLocator().getStartEndKeys(); if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) { throw new IOException("Expecting at least one region."); final byte[] startKey = keys.getFirst()[i]; final byte[] endKey = keys.getSecond()[i]; final String regionLocation = table.getRegionLocator().getRegionLocation(startKey, false).getHostnamePort();
final Pair<byte[][], byte[][]> keys = table.getRegionLocator().getStartEndKeys(); if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) { throw new IOException("Expecting at least one region."); final byte[] startKey = keys.getFirst()[i]; final byte[] endKey = keys.getSecond()[i]; final String regionLocation = table.getRegionLocator().getRegionLocation(startKey, false).getHostnamePort();
byte[] currentKey = startKey; do { HRegionLocation regionLocation = getRegionLocator().getRegionLocation(currentKey, reload); keysInRange.add(currentKey); regionsInRange.add(regionLocation);
this.connection = table.getConnection(); try { this.regionLocator = table.getRegionLocator(); this.admin = this.connection.getAdmin(); } catch (NeedUnmanagedConnectionException exception) {
private HRegionFileSystem getHRegionFS(HTable table, Configuration conf) throws IOException { FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem(); Path tableDir = FSUtils.getTableDir(TEST_UTIL.getDefaultRootDirPath(), table.getName()); List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir); assertEquals(1, regionDirs.size()); List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDirs.get(0)); assertEquals(2, familyDirs.size()); RegionInfo hri = table.getRegionLocator().getAllRegionLocations().get(0).getRegionInfo(); HRegionFileSystem regionFs = new HRegionFileSystem(conf, new HFileSystem(fs), tableDir, hri); return regionFs; }
/** * Truncates HTable while preserving the region pre-splits * @param table HTable to truncate * @return new instance of the truncated HTable * @throws IOException throws IOException in case of any HBase IO problems */ public static HTable truncateTable(HTable table) throws IOException { Configuration conf = table.getConfiguration(); byte[][] presplits = table.getRegionLocator().getStartKeys(); if (presplits.length > 0 && presplits[0].length == 0) { presplits = Arrays.copyOfRange(presplits, 1, presplits.length); } HTableDescriptor desc = table.getTableDescriptor(); table.close(); try (Connection con = ConnectionFactory.createConnection(conf)) { try (Admin admin = con.getAdmin()) { admin.disableTable(desc.getTableName()); admin.deleteTable(desc.getTableName()); admin.createTable(desc, presplits); } } return HalyardTableUtils.getTable(conf, desc.getTableName().getNameAsString(), false, 0); }
table.getRegionLocator().getRegionLocation(row).getRegion(); ServerName currentServer = cluster.getServerHoldingRegion(tn, region.getRegionName()); for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {