/** * <p> * Return the cardinality produced by the contained field partitioners. * </p> * <p> * This can be used to aid in calculating resource usage during certain * operations. For example, when writing data to a partitioned dataset, you * can use this method to estimate (or discover exactly, depending on the * partition functions) how many leaf partitions exist. * </p> * <p> * <strong>Warning:</strong> This method is allowed to lie and should be * treated only as a hint. Some partition functions are fixed (for example, * hash modulo number of buckets), while others are open-ended (for * example, discrete value) and depend on the input data. * </p> * * @return The estimated (or possibly concrete) number of leaf partitions. */ public int getCardinality() { int cardinality = 1; for (FieldPartitioner fieldPartitioner : fieldPartitioners) { if (fieldPartitioner.getCardinality() == FieldPartitioner.UNKNOWN_CARDINALITY) { return FieldPartitioner.UNKNOWN_CARDINALITY; } cardinality *= fieldPartitioner.getCardinality(); } return cardinality; }
fieldPartitioner.getName(), fieldPartitioner.getCardinality()); } else if (fieldPartitioner instanceof IdentityFieldPartitioner) { return String.format("identity(\"%s\", \"%s\", \"%s\", %s)", fieldPartitioner.getSourceName(), fieldPartitioner.getName(), fieldPartitioner.getType().getName(), fieldPartitioner.getCardinality()); } else if (fieldPartitioner instanceof RangeFieldPartitioner) { List<String> upperBounds = ((RangeFieldPartitioner) fieldPartitioner)
partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName())); partitioner.set(TYPE, TextNode.valueOf("hash")); partitioner.set(BUCKETS, LongNode.valueOf(fp.getCardinality())); } else if (fp instanceof LongFixedSizeRangeFieldPartitioner) { partitioner.set(SOURCE, TextNode.valueOf(fp.getSourceName()));
@Test public void testSubpartitionStrategy() { String expr = "[hash(\"username\", \"username_part\", 2), hash(\"username2\", " + "\"username2_part\", 3)]"; PartitionExpression expression = new PartitionExpression(expr, true); PartitionStrategy strategy = expression.evaluate(); List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); Assert.assertEquals(HashFieldPartitioner.class, fp0.getClass()); Assert.assertEquals("username_part", fp0.getName()); Assert.assertEquals(2, fp0.getCardinality()); FieldPartitioner fp1 = fieldPartitioners.get(1); Assert.assertEquals(HashFieldPartitioner.class, fp1.getClass()); Assert.assertEquals("username2_part", fp1.getName()); Assert.assertEquals(3, fp1.getCardinality()); Assert.assertEquals(expr, PartitionExpression.toExpression(strategy)); }
@Test public void test() throws Exception { final PartitionStrategy p = new PartitionStrategy.Builder() .identity("month", "month_ordinal", 12) .hash("userId", 7) .build(); List<FieldPartitioner> fieldPartitioners = p.getFieldPartitioners(); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); assertEquals("month_ordinal", fp0.getName()); assertEquals(12, fp0.getCardinality()); FieldPartitioner fp1 = fieldPartitioners.get(1); assertEquals("userId_hash", fp1.getName()); assertEquals(7, fp1.getCardinality()); assertEquals(12 * 7, p.getCardinality()); // useful for writers }
@Test public void testPartitionStrategy() { String expr = "hash(\"username\", \"username_part\", 2)"; PartitionExpression expression = new PartitionExpression(expr, true); PartitionStrategy strategy = expression.evaluate(); List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); Assert.assertEquals(1, fieldPartitioners.size()); FieldPartitioner fp = fieldPartitioners.get(0); Assert.assertEquals(HashFieldPartitioner.class, fp.getClass()); Assert.assertEquals("username", fp.getSourceName()); Assert.assertEquals("username_part", fp.getName()); Assert.assertEquals(2, fp.getCardinality()); Assert.assertEquals(expr, PartitionExpression.toExpression(strategy)); }