@SuppressWarnings("deprecation") private static Collection<String[]> requiredFields(@Nullable PartitionStrategy strategy) { if (strategy == null) { return NO_REQUIRED_FIELDS; } List<String[]> requiredFields = Lists.newArrayList(); for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { // source name is not present for provided partitioners if (fp.getSourceName() != null) { requiredFields.add(fp.getSourceName().split("\\.")); } } return requiredFields; } }
@SuppressWarnings("deprecation") private static Collection<String[]> requiredFields(@Nullable PartitionStrategy strategy) { if (strategy == null) { return NO_REQUIRED_FIELDS; } List<String[]> requiredFields = Lists.newArrayList(); for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { // source name is not present for provided partitioners if (fp.getSourceName() != null) { requiredFields.add(fp.getSourceName().split("\\.")); } } return requiredFields; } }
@SuppressWarnings("unchecked") public static <S> Class<? extends S> getSourceType(FieldPartitioner<S, ?> fp, Schema schema) { return (Class<S>) getClassForType(fieldSchema(schema, fp.getSourceName()).getType()); }
public KeySchema(String rawSchema, PartitionStrategy partitionStrategy) { this.rawSchema = rawSchema; this.partitionStrategy = partitionStrategy; this.fieldPositions = Maps.newHashMap(); List<FieldPartitioner> partitioners = Accessor.getDefault().getFieldPartitioners(partitionStrategy); for (int i = 0; i < partitioners.size(); i += 1) { FieldPartitioner fp = partitioners.get(i); if (fp instanceof IdentityFieldPartitioner) { fieldPositions.put(fp.getSourceName(), i); } } }
public KeySchema(String rawSchema, PartitionStrategy partitionStrategy) { this.rawSchema = rawSchema; this.partitionStrategy = partitionStrategy; this.fieldPositions = Maps.newHashMap(); List<FieldPartitioner> partitioners = Accessor.getDefault().getFieldPartitioners(partitionStrategy); for (int i = 0; i < partitioners.size(); i += 1) { FieldPartitioner fp = partitioners.get(i); if (fp instanceof IdentityFieldPartitioner) { fieldPositions.put(fp.getSourceName(), i); } } }
@SuppressWarnings("unchecked") @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH", justification="Null case checked by precondition") private Object partitionValue(E object, @Nullable Map<String, Object> provided, FieldPartitioner fp) { if (fp instanceof ProvidedFieldPartitioner) { String name = fp.getName(); Preconditions.checkArgument( (provided != null) && provided.containsKey(name), "Cannot construct key, missing provided value: %s", name); return provided.get(name); } else { return fp.apply(get(object, fp.getSourceName())); } }
private <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (values.containsKey(fp.getName())) { return Conversions.convert(values.get(fp.getName()), fp.getType()); } else if (values.containsKey(fp.getSourceName())) { return fp.apply(Conversions.convert( values.get(fp.getSourceName()), fp.getSourceType())); } else { throw new IllegalStateException( "Cannot create StorageKey, missing data for field:" + fp.getName()); } } }
@SuppressWarnings("unchecked") private <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (values.containsKey(fp.getName())) { return Conversions.convert(values.get(fp.getName()), SchemaUtil.getPartitionType(fp, schema)); } else if (values.containsKey(fp.getSourceName())) { return fp.apply(Conversions.convert(values.get(fp.getSourceName()), SchemaUtil.getSourceType(fp, schema))); } else { throw new IllegalStateException( "Cannot create Key, missing data for field:" + fp.getName()); } } }
public TimeDomain(PartitionStrategy strategy, String sourceName) { Map<Integer, CalendarFieldPartitioner> mapping = Maps.newHashMap(); for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { // there may be partitioners for more than one source field if (sourceName.equals(fp.getSourceName()) && fp instanceof CalendarFieldPartitioner) { mapping.put( ((CalendarFieldPartitioner) fp).getCalendarField(), (CalendarFieldPartitioner) fp); } } // get the partitioners to check for this strategy this.partitioners = Lists.newArrayList(); for (int field : order) { // if there is no partition for the next field, then all are included // example: yyyy/mm/dd partitioning accepts when field is hour if (mapping.containsKey(field)) { partitioners.add(mapping.get(field)); } else if (!partitioners.isEmpty()) { break; } } }
@SuppressWarnings("unchecked") public static <S, T> Class<? extends T> getPartitionType(FieldPartitioner<S, T> fp, Schema schema) { if (fp instanceof ProvidedFieldPartitioner) { // provided partitioners have no source field schema return fp.getType(); } Class<? extends S> inputType = (Class<S>) getClassForType( fieldSchema(schema, fp.getSourceName()).getType()); return fp.getType(inputType); }
/** * <p> * Construct a partition key for the given entity, reusing the supplied key if * not null. * </p> * <p> * This is a convenient way to find the partition that a given entity is * written to, or to find a partition using objects from the entity domain. * </p> */ @SuppressWarnings("unchecked") public static <E> PartitionKey partitionKeyForEntity(PartitionStrategy strategy, E entity, EntityAccessor<E> accessor, @Nullable PartitionKey reuseKey) { List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); PartitionKey key = (reuseKey == null ? new PartitionKey(new Object[fieldPartitioners.size()]) : reuseKey); for (int i = 0; i < fieldPartitioners.size(); i++) { FieldPartitioner fp = fieldPartitioners.get(i); key.set(i, fp.apply(accessor.get(entity, fp.getSourceName()))); } return key; }
/** * Construct a {@link Builder} for a {@link RandomAccessDataset}. */ public Builder(RandomAccessDataset dataset) { this.schema = dataset.getDescriptor().getSchema(); this.strategy = dataset.getDescriptor().getPartitionStrategy(); this.fieldNames = Sets.newHashSet(); for (FieldPartitioner fp : strategy.getFieldPartitioners()) { fieldNames.add(fp.getSourceName()); fieldNames.add(fp.getName()); } this.values = Maps.newHashMap(); }
@SuppressWarnings("deprecation") static List<FieldSchema> partitionColumns(PartitionStrategy strategy, Schema schema) { List<FieldSchema> columns = Lists.newArrayList(); for (FieldPartitioner<?, ?> fp : Accessor.getDefault().getFieldPartitioners(strategy)) { columns.add(new FieldSchema(fp.getName(), getHiveType(SchemaUtil.getPartitionType(fp, schema)), "Partition column derived from '" + fp.getSourceName() + "' column, " + "generated by Kite.")); } return columns; }
@SuppressWarnings("deprecation") static List<FieldSchema> partitionColumns(PartitionStrategy strategy, Schema schema) { List<FieldSchema> columns = Lists.newArrayList(); for (FieldPartitioner<?, ?> fp : Accessor.getDefault().getFieldPartitioners(strategy)) { columns.add(new FieldSchema(fp.getName(), getHiveType(SchemaUtil.getPartitionType(fp, schema)), "Partition column derived from '" + fp.getSourceName() + "' column, " + "generated by Kite.")); } return columns; }
/** * Return the value of a {@code FieldPartitioner} field for this {@link Marker}. * * If the {@code Marker} has a value for the field's name, that value is * returned using {@link Marker#getAs(java.lang.String, java.lang.Class)}. If * the {@code Marker} only has a value for the the source field name, then * that value is retrieved using * {@link org.kitesdk.data.spi.Marker#getAs(java.lang.String, * java.lang.Class)} and the field's transformation is applied to it as the source value. * * @param fp a {@code FieldPartitioner} * @return the value of the field for this {@code marker}, or null * @since 0.9.0 */ @Nullable public <S, T> T valueFor(FieldPartitioner<S, T> fp) { if (has(fp.getName())) { return getAs(fp.getName(), fp.getType()); } else if (has(fp.getSourceName())) { return fp.apply(getAs(fp.getSourceName(), fp.getSourceType())); } else { return null; } }
@Override @SuppressWarnings("unchecked") public Iterator<MarkerRange> iterator() { // this should be part of PartitionStrategy final LinkedListMultimap<String, FieldPartitioner> partitioners = LinkedListMultimap.create(); for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(strategy)) { partitioners.put(fp.getSourceName(), fp); } Iterator<MarkerRange.Builder> current = start(new MarkerRange.Builder(cmp)); // primarily loop over sources because the logical constraints are there for (String source : partitioners.keySet()) { Predicate constraint = predicates.get(source); List<FieldPartitioner> fps = partitioners.get(source); FieldPartitioner first = fps.get(0); if (first instanceof CalendarFieldPartitioner) { current = TimeDomain.get(strategy, source) .addStackedIterator(constraint, current); } else if (constraint instanceof In) { current = add((In) constraint, fps, current); } else if (constraint instanceof Range) { current = add((Range) constraint, fps, current); } } return Iterators.transform(current, new ToMarkerRangeFunction()); }
public static AvroKeySchema mergeSpecificStringTypes( Class<? extends SpecificRecord> specificClass, AvroKeySchema keySchema) { Schema schemaField; try { schemaField = (Schema) specificClass.getField("SCHEMA$").get(null); } catch (IllegalArgumentException e) { throw new DatasetException(e); } catch (SecurityException e) { throw new DatasetException(e); } catch (IllegalAccessException e) { throw new DatasetException(e); } catch (NoSuchFieldException e) { throw new DatasetException(e); } // Ensure schema is limited to keySchema's fields. The class may have more // fields // in the case that the entity is being used as a key. List<Field> fields = Lists.newArrayList(); PartitionStrategy strategy = keySchema.getPartitionStrategy(); for (Schema.Field field : keySchema.getAvroSchema().getFields()) { String sourceName = Accessor.getDefault().getPartitioner(strategy, field.name()) .getSourceName(); fields.add(copy(schemaField.getField(sourceName))); } Schema schema = Schema.createRecord(keySchema.getAvroSchema().getName(), keySchema.getAvroSchema().getDoc(), keySchema.getAvroSchema() .getNamespace(), keySchema.getAvroSchema().isError()); schema.setFields(fields); return new AvroKeySchema(schema, keySchema.getPartitionStrategy()); }
@Test public void testMixedSubpartitionStrategy() { String expr = "[year(\"timestamp\", \"year\"), minute(\"timestamp\", \"minute\")]"; PartitionExpression expression = new PartitionExpression(expr, true); PartitionStrategy strategy = expression.evaluate(); List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); Assert.assertEquals(2, fieldPartitioners.size()); FieldPartitioner fp0 = fieldPartitioners.get(0); Assert.assertEquals(YearFieldPartitioner.class, fp0.getClass()); Assert.assertEquals("timestamp", fp0.getSourceName()); Assert.assertEquals("year", fp0.getName()); FieldPartitioner fp1 = fieldPartitioners.get(1); Assert.assertEquals(MinuteFieldPartitioner.class, fp1.getClass()); Assert.assertEquals("timestamp", fp1.getSourceName()); Assert.assertEquals("minute", fp1.getName()); Assert.assertEquals(expr, PartitionExpression.toExpression(strategy)); }
/** * Builds a Schema for the FieldPartitioner using the given Schema to * determine types not fixed by the FieldPartitioner. * * @param fp a FieldPartitioner * @param schema an entity Schema that will be partitioned * @return a Schema for the field partitioner */ public static Schema partitionFieldSchema(FieldPartitioner<?, ?> fp, Schema schema) { if (fp instanceof IdentityFieldPartitioner) { // copy the schema directly from the entity to preserve annotations return fieldSchema(schema, fp.getSourceName()); } else { Class<?> fieldType = getPartitionType(fp, schema); if (fieldType == Integer.class) { return Schema.create(Schema.Type.INT); } else if (fieldType == Long.class) { return Schema.create(Schema.Type.LONG); } else if (fieldType == String.class) { return Schema.create(Schema.Type.STRING); } else { throw new ValidationException( "Cannot encode partition " + fp.getName() + " with type " + fp.getSourceType() ); } } }
@Test public void testPartitionStrategy() { String expr = "hash(\"username\", \"username_part\", 2)"; PartitionExpression expression = new PartitionExpression(expr, true); PartitionStrategy strategy = expression.evaluate(); List<FieldPartitioner> fieldPartitioners = Accessor.getDefault().getFieldPartitioners(strategy); Assert.assertEquals(1, fieldPartitioners.size()); FieldPartitioner fp = fieldPartitioners.get(0); Assert.assertEquals(HashFieldPartitioner.class, fp.getClass()); Assert.assertEquals("username", fp.getSourceName()); Assert.assertEquals("username_part", fp.getName()); Assert.assertEquals(2, fp.getCardinality()); Assert.assertEquals(expr, PartitionExpression.toExpression(strategy)); }