@Override public Map<String, Integer> load(PartitionStrategy strategy) { final List<FieldPartitioner> fields = Accessor.getDefault().getFieldPartitioners(strategy); final Map<String, Integer> fieldMap = Maps .newHashMapWithExpectedSize(fields.size()); for (int i = 0, n = fields.size(); i < n; i += 1) { fieldMap.put(fields.get(i).getName(), i); } return fieldMap; } });
private <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (values.containsKey(fp.getName())) { return Conversions.convert(values.get(fp.getName()), fp.getType()); } else if (values.containsKey(fp.getSourceName())) { return fp.apply(Conversions.convert( values.get(fp.getSourceName()), fp.getSourceType())); } else { throw new IllegalStateException( "Cannot create StorageKey, missing data for field:" + fp.getName()); } } }
@SuppressWarnings("unchecked") private <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (values.containsKey(fp.getName())) { return Conversions.convert(values.get(fp.getName()), SchemaUtil.getPartitionType(fp, schema)); } else if (values.containsKey(fp.getSourceName())) { return fp.apply(Conversions.convert(values.get(fp.getSourceName()), SchemaUtil.getSourceType(fp, schema))); } else { throw new IllegalStateException( "Cannot create Key, missing data for field:" + fp.getName()); } } }
/** * Builds a Schema.Field for the FieldPartitioner using the Schema to * determine types not fixed by the FieldPartitioner. * * @param fp a FieldPartitioner * @param schema an entity Schema that will be partitioned * @return a Schema.Field for the field partitioner, with the same name */ private static Schema.Field partitionField(FieldPartitioner<?, ?> fp, Schema schema) { return new Schema.Field( fp.getName(), partitionFieldSchema(fp, schema), null, null); }
public static <T> String dirnameForValue(FieldPartitioner<?, T> field, T value) { return PART_JOIN.join(field.getName(), valueToString(field, value)); }
@SuppressWarnings("unchecked") @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH", justification="Null case checked by precondition") private Object partitionValue(E object, @Nullable Map<String, Object> provided, FieldPartitioner fp) { if (fp instanceof ProvidedFieldPartitioner) { String name = fp.getName(); Preconditions.checkArgument( (provided != null) && provided.containsKey(name), "Cannot construct key, missing provided value: %s", name); return provided.get(name); } else { return fp.apply(get(object, fp.getSourceName())); } }
public static void checkStrategyUpdate(PartitionStrategy existing, PartitionStrategy other, Schema schema) { List<FieldPartitioner> existingFields = Accessor.getDefault() .getFieldPartitioners(existing); List<FieldPartitioner> otherFields = Accessor.getDefault() .getFieldPartitioners(other); ValidationException.check(existingFields.size() == otherFields.size(), "Not compatible: cannot replace %s partitioners with %s partitioners", existingFields.size(), otherFields.size()); for (int i = 0; i < existingFields.size(); i += 1) { FieldPartitioner fp = existingFields.get(i); FieldPartitioner replacement = otherFields.get(i); if (fp.equals(replacement)) { continue; } ValidationException.check(fp instanceof ProvidedFieldPartitioner, "Cannot replace partition %s: not a provided partitioner", fp.getName()); ValidationException.check(fp.getName().equals(replacement.getName()), "Cannot change the name of partition %s (to %s)", fp.getName(), replacement.getName()); Class<?> outputType = SchemaUtil.getPartitionType(replacement, schema); ValidationException.check( isCompatibleWithProvidedType(fp.getType(), outputType), "Cannot change the data type of partition %s", fp.getName()); } }
/** * Construct a partition strategy with a list of field partitioners. */ PartitionStrategy(List<FieldPartitioner> partitioners) { this.fieldPartitioners = ImmutableList.copyOf(partitioners); ImmutableMap.Builder<String, FieldPartitioner> mapBuilder = ImmutableMap.builder(); for (FieldPartitioner fp : partitioners) { mapBuilder.put(fp.getName(), fp); } this.partitionerMap = mapBuilder.build(); }
/** * Construct a {@link Builder} for a {@link RandomAccessDataset}. */ public Builder(RandomAccessDataset dataset) { this.schema = dataset.getDescriptor().getSchema(); this.strategy = dataset.getDescriptor().getPartitionStrategy(); this.fieldNames = Sets.newHashSet(); for (FieldPartitioner fp : strategy.getFieldPartitioners()) { fieldNames.add(fp.getSourceName()); fieldNames.add(fp.getName()); } this.values = Maps.newHashMap(); }
@SuppressWarnings("deprecation") static List<FieldSchema> partitionColumns(PartitionStrategy strategy, Schema schema) { List<FieldSchema> columns = Lists.newArrayList(); for (FieldPartitioner<?, ?> fp : Accessor.getDefault().getFieldPartitioners(strategy)) { columns.add(new FieldSchema(fp.getName(), getHiveType(SchemaUtil.getPartitionType(fp, schema)), "Partition column derived from '" + fp.getSourceName() + "' column, " + "generated by Kite.")); } return columns; }
@SuppressWarnings("deprecation") static List<FieldSchema> partitionColumns(PartitionStrategy strategy, Schema schema) { List<FieldSchema> columns = Lists.newArrayList(); for (FieldPartitioner<?, ?> fp : Accessor.getDefault().getFieldPartitioners(strategy)) { columns.add(new FieldSchema(fp.getName(), getHiveType(SchemaUtil.getPartitionType(fp, schema)), "Partition column derived from '" + fp.getSourceName() + "' column, " + "generated by Kite.")); } return columns; }
/** * Return the value of a {@code FieldPartitioner} field for this {@link Marker}. * * If the {@code Marker} has a value for the field's name, that value is * returned using {@link Marker#getAs(java.lang.String, java.lang.Class)}. If * the {@code Marker} only has a value for the the source field name, then * that value is retrieved using * {@link org.kitesdk.data.spi.Marker#getAs(java.lang.String, * java.lang.Class)} and the field's transformation is applied to it as the source value. * * @param fp a {@code FieldPartitioner} * @return the value of the field for this {@code marker}, or null * @since 0.9.0 */ @Nullable public <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (has(fp.getName())) { return getAs(fp.getName(), fp.getType()); } else if (has(fp.getSourceName())) { return fp.apply(getAs(fp.getSourceName(), fp.getSourceType())); } else { return null; } }
@Test public void testSubpartitionStrategy() { String expr = "[hash(\"username\", \"username_part\", 2), hash(\"username2\", " + "\"username2_part\", 3)]"; PartitionExpression expression = new PartitionExpression(expr, true); PartitionStrategy strategy = expression.evaluate(); List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); Assert.assertEquals(HashFieldPartitioner.class, fp0.getClass()); Assert.assertEquals("username_part", fp0.getName()); Assert.assertEquals(2, fp0.getCardinality()); FieldPartitioner fp1 = fieldPartitioners.get(1); Assert.assertEquals(HashFieldPartitioner.class, fp1.getClass()); Assert.assertEquals("username2_part", fp1.getName()); Assert.assertEquals(3, fp1.getCardinality()); Assert.assertEquals(expr, PartitionExpression.toExpression(strategy)); }
@Test public void testMixedSubpartitionStrategy() { String expr = "[year(\"timestamp\", \"year\"), minute(\"timestamp\", \"minute\")]"; PartitionExpression expression = new PartitionExpression(expr, true); PartitionStrategy strategy = expression.evaluate(); List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); Assert.assertEquals(YearFieldPartitioner.class, fp0.getClass()); Assert.assertEquals("timestamp", fp0.getSourceName()); Assert.assertEquals("year", fp0.getName()); FieldPartitioner fp1 = fieldPartitioners.get(1); Assert.assertEquals(MinuteFieldPartitioner.class, fp1.getClass()); Assert.assertEquals("timestamp", fp1.getSourceName()); Assert.assertEquals("minute", fp1.getName()); Assert.assertEquals(expr, PartitionExpression.toExpression(strategy)); }
/** * Builds a Schema for the FieldPartitioner using the given Schema to * determine types not fixed by the FieldPartitioner. * * @param fp a FieldPartitioner * @param schema an entity Schema that will be partitioned * @return a Schema for the field partitioner */ public static Schema partitionFieldSchema(FieldPartitioner<?, ?> fp, Schema schema) { if (fp instanceof IdentityFieldPartitioner) { // copy the schema directly from the entity to preserve annotations return fieldSchema(schema, fp.getSourceName()); } else { Class<?> fieldType = getPartitionType(fp, schema); if (fieldType == Integer.class) { return Schema.create(Schema.Type.INT); } else if (fieldType == Long.class) { return Schema.create(Schema.Type.LONG); } else if (fieldType == String.class) { return Schema.create(Schema.Type.STRING); } else { throw new ValidationException( "Cannot encode partition " + fp.getName() + " with type " + fp.getSourceType() ); } } }
@Test public void test() throws Exception { final PartitionStrategy p = new PartitionStrategy.Builder() .identity("month", "month_ordinal", 12) .hash("userId", 7) .build(); List<FieldPartitioner> fieldPartitioners = p.getFieldPartitioners(); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); assertEquals("month_ordinal", fp0.getName()); assertEquals(12, fp0.getCardinality()); FieldPartitioner fp1 = fieldPartitioners.get(1); assertEquals("userId_hash", fp1.getName()); assertEquals(7, fp1.getCardinality()); assertEquals(12 * 7, p.getCardinality()); // useful for writers }
@Test public void testPartitionStrategy() { String expr = "hash(\"username\", \"username_part\", 2)"; PartitionExpression expression = new PartitionExpression(expr, true); PartitionStrategy strategy = expression.evaluate(); List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); Assert.assertEquals(1, fieldPartitioners.size()); FieldPartitioner fp = fieldPartitioners.get(0); Assert.assertEquals(HashFieldPartitioner.class, fp.getClass()); Assert.assertEquals("username", fp.getSourceName()); Assert.assertEquals("username_part", fp.getName()); Assert.assertEquals(2, fp.getCardinality()); Assert.assertEquals(expr, PartitionExpression.toExpression(strategy)); }
Assert.assertEquals(1, fieldPartitioners.size()); Assert.assertEquals(subpartitionName, fieldPartitioners.get(0) .getName());