/** * It computes average while fetching sum and row count from all the * corresponding regions. Approach is to compute a global sum of region level * sum and rowcount and then compute the average. * @param tableName the name of the table to scan * @param scan the HBase scan object to use to read data from HBase * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ private <R, S, P extends Message, Q extends Message, T extends Message> Pair<S, Long> getAvgArgs( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return getAvgArgs(table, ci, scan); } }
Pair<NavigableMap<byte[], List<S>>, List<S>> p = getMedianArgs(table, ci, scan); byte[] startRow = null; byte[] colFamily = scan.getFamilies()[0];
/** * This is the client side interface/handle for calling the std method for a * given cf-cq combination. It was necessary to add one more call stack as its * return type should be a decimal value, irrespective of what * columninterpreter says. So, this methods collects the necessary parameters * to compute the std and returns the double value. * @param table table to scan. * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return <R, S> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> double std( final Table table, ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable { Pair<List<S>, Long> p = getStdArgs(table, ci, scan); double res = 0d; double avg = ci.divideForAvg(p.getFirst().get(0), p.getSecond()); double avgOfSumSq = ci.divideForAvg(p.getFirst().get(1), p.getSecond()); res = avgOfSumSq - (avg) * (avg); // variance res = Math.pow(res, 0.5); return res; }
/** * It gives the row count, by summing up the individual results obtained from * regions. In case the qualifier is null, FirstKeyValueFilter is used to * optimised the operation. In case qualifier is provided, I can't use the * filter as it may set the flag to skip to next row, but the value read is * not of the given filter: in this case, this particular row will not be * counted ==> an error. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return <R, S> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> long rowCount( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return rowCount(table, ci, scan); } }
private static long countNumberOfRows(HConnection connection, byte[] tableName, byte[] columnFamily) throws Throwable { AggregationClient aggregationClient = new AggregationClient(connection.getConfiguration()); Scan scan = new Scan(); scan.addFamily(columnFamily); return aggregationClient.rowCount(tableName, null, scan); }
/** * It sums up the value returned from various regions. In case qualifier is * null, summation of all the column qualifiers in the given family is done. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return sum <S> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> S sum( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return sum(table, ci, scan); } }
/** * This is the client side interface/handler for calling the median method for a * given cf-cq combination. This method collects the necessary parameters * to compute the median and returns the median. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return R the median * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> R median(final TableName tableName, ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return median(table, ci, scan); } }
/** * It gives the maximum value of a column for a given column family for the * given range. In case qualifier is null, a max of all values for the given * family is returned. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return max val <R> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> R max( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return max(table, ci, scan); } }
/** * It gives the minimum value of a column for a given column family for the * given range. In case qualifier is null, a min of all values for the given * family is returned. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return min val <R> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> R min( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return min(table, ci, scan); } }
/** * This is the client side interface/handle for calling the std method for a * given cf-cq combination. It was necessary to add one more call stack as its * return type should be a decimal value, irrespective of what * columninterpreter says. So, this methods collects the necessary parameters * to compute the std and returns the double value. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return <R, S> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> double std(final TableName tableName, ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return std(table, ci, scan); } }
<R, S, P extends Message, Q extends Message, T extends Message> AggregateRequest validateArgAndGetPB(Scan scan, ColumnInterpreter<R,S,P,Q,T> ci, boolean canFamilyBeAbsent) throws IOException { validateParameters(scan, canFamilyBeAbsent); final AggregateRequest.Builder requestBuilder = AggregateRequest.newBuilder(); requestBuilder.setInterpreterClassName(ci.getClass().getCanonicalName()); P columnInterpreterSpecificData = null; if ((columnInterpreterSpecificData = ci.getRequestData()) != null) { requestBuilder.setInterpreterSpecificBytes(columnInterpreterSpecificData.toByteString()); } requestBuilder.setScan(ProtobufUtil.toScan(scan)); return requestBuilder.build(); }
/** * It gives the row count, by summing up the individual results obtained from * regions. In case the qualifier is null, FirstKeyValueFilter is used to * optimised the operation. In case qualifier is provided, I can't use the * filter as it may set the flag to skip to next row, but the value read is * not of the given filter: in this case, this particular row will not be * counted ==> an error. * @param tableName * @param ci * @param scan * @return <R, S> * @throws Throwable */ public <R, S, P extends Message, Q extends Message, T extends Message> long rowCount( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return rowCount(table, ci, scan); } }
private static long countNumberOfRows(HConnection connection, byte[] tableName, byte[] columnFamily) throws Throwable { AggregationClient aggregationClient = new AggregationClient(connection.getConfiguration()); Scan scan = new Scan(); scan.addFamily(columnFamily); return aggregationClient.rowCount(tableName, null, scan); }
/** * It sums up the value returned from various regions. In case qualifier is * null, summation of all the column qualifiers in the given family is done. * @param tableName * @param ci * @param scan * @return sum <S> * @throws Throwable */ public <R, S, P extends Message, Q extends Message, T extends Message> S sum( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return sum(table, ci, scan); } }
/** * This is the client side interface/handler for calling the median method for a * given cf-cq combination. This method collects the necessary parameters * to compute the median and returns the median. * @param tableName * @param ci * @param scan * @return R the median * @throws Throwable */ public <R, S, P extends Message, Q extends Message, T extends Message> R median(final TableName tableName, ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return median(table, ci, scan); } }
/** * It gives the maximum value of a column for a given column family for the * given range. In case qualifier is null, a max of all values for the given * family is returned. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return max val <R> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> R max( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return max(table, ci, scan); } }
/** * It gives the minimum value of a column for a given column family for the * given range. In case qualifier is null, a min of all values for the given * family is returned. * @param tableName * @param ci * @param scan * @return min val <R> * @throws Throwable */ public <R, S, P extends Message, Q extends Message, T extends Message> R min( final TableName tableName, final ColumnInterpreter<R, S, P, Q, T> ci, final Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return min(table, ci, scan); } }
/** * This is the client side interface/handle for calling the std method for a * given cf-cq combination. It was necessary to add one more call stack as its * return type should be a decimal value, irrespective of what * columninterpreter says. So, this methods collects the necessary parameters * to compute the std and returns the double value. * @param tableName the name of the table to scan * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return <R, S> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> double std(final TableName tableName, ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable { try (Table table = connection.getTable(tableName)) { return std(table, ci, scan); } }
getMedianArgs(final byte[] tableName, final ColumnInterpreter<R, S> ci, final Scan scan) throws Throwable { validateParameters(scan); final NavigableMap<byte[], List<S>> map = new TreeMap<byte[], List<S>>(Bytes.BYTES_COMPARATOR);
/** * This is the client side interface/handle for calling the average method for * a given cf-cq combination. It was necessary to add one more call stack as * its return type should be a decimal value, irrespective of what * columninterpreter says. So, this methods collects the necessary parameters * to compute the average and returs the double value. * @param table table to scan. * @param ci the user's ColumnInterpreter implementation * @param scan the HBase scan object to use to read data from HBase * @return <R, S> * @throws Throwable The caller is supposed to handle the exception as they are thrown * & propagated to it. */ public <R, S, P extends Message, Q extends Message, T extends Message> double avg( final Table table, final ColumnInterpreter<R, S, P, Q, T> ci, Scan scan) throws Throwable { Pair<S, Long> p = getAvgArgs(table, ci, scan); return ci.divideForAvg(p.getFirst(), p.getSecond()); }