com.netflix.iceberg.types.Types$StructType java code examples

/**
 * Used by Avro reflection to instantiate this class when reading manifest files.
 */
public GenericDataFile(org.apache.avro.Schema avroSchema) {
 this.avroSchema = avroSchema;
 Types.StructType schema = AvroSchemaUtil.convert(avroSchema).asNestedType().asStructType();
 // partition type may be null if the field was not projected
 Type partType = schema.fieldType("partition");
 if (partType != null) {
  this.partitionType = partType.asNestedType().asStructType();
 } else {
  this.partitionType = EMPTY_STRUCT_TYPE;
 }
 List<Types.NestedField> fields = schema.fields();
 List<Types.NestedField> allFields = DataFile.getType(partitionType).fields();
 this.fromProjectionPos = new int[fields.size()];
 for (int i = 0; i < fromProjectionPos.length; i += 1) {
  boolean found = false;
  for (int j = 0; j < allFields.size(); j += 1) {
   if (fields.get(i).fieldId() == allFields.get(j).fieldId()) {
    found = true;
    fromProjectionPos[i] = j;
   }
  }
  if (!found) {
   throw new IllegalArgumentException("Cannot find projected field: " + fields.get(i));
  }
 }
 this.partitionData = new PartitionData(partitionType);
}

@Override
public Type struct(Types.StructType struct, Iterable<Type> fieldTypes) {
 Preconditions.checkNotNull(sourceType, "Evaluation must start with a schema.");
 Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType);
 Types.StructType sourceStruct = sourceType.asStructType();
 List<Types.NestedField> fields = struct.fields();
 int length = fields.size();
 List<Type> types = Lists.newArrayList(fieldTypes);
 List<Types.NestedField> newFields = Lists.newArrayListWithExpectedSize(length);
 for (int i = 0; i < length; i += 1) {
  Types.NestedField field = fields.get(i);
  int sourceFieldId = sourceStruct.field(field.name()).fieldId();
  if (field.isRequired()) {
   newFields.add(Types.NestedField.required(sourceFieldId, field.name(), types.get(i)));
  } else {
   newFields.add(Types.NestedField.optional(sourceFieldId, field.name(), types.get(i)));
  }
 }
 return Types.StructType.of(newFields);
}

@Override
public Schema.Field field(Schema.Field field, Supplier<Schema> fieldResult) {
 Types.StructType struct = current.asNestedType().asStructType();
 int fieldId = AvroSchemaUtil.getFieldId(field);
 Types.NestedField expectedField = struct.field(fieldId); // TODO: what if there are no ids?
 // if the field isn't present, it was not selected
 if (expectedField == null) {
  return null;
 }
 String expectedName = expectedField.name();
 this.current = expectedField.type();
 try {
  Schema schema = fieldResult.get();
  if (schema != field.schema() || !expectedName.equals(field.name())) {
   // add an alias for the field
   return copyField(field, schema, expectedName);
  } else {
   // always copy because fields can't be reused
   return copyField(field, field.schema(), field.name());
  }
 } finally {
  this.current = struct;
 }
}

@Override
public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Long valueCount = valueCounts.get(id);
 if (valueCount == null) {
  // the column is not present and is all nulls
  return ROWS_CANNOT_MATCH;
 }
 Statistics<?> colStats = stats.get(id);
 if (colStats != null && !colStats.isEmpty()) {
  if (!colStats.hasNonNullValue()) {
   return ROWS_CANNOT_MATCH;
  }
  T lower = min(colStats, id);
  int cmp = lit.comparator().compare(lower, lit.value());
  if (cmp > 0) {
   return ROWS_CANNOT_MATCH;
  }
 }
 return ROWS_MIGHT_MATCH;
}

@Override
public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Long valueCount = valueCounts.get(id);
 if (valueCount == null) {
  // the column is not present and is all nulls
  return ROWS_CANNOT_MATCH;
 }
 Statistics<?> colStats = stats.get(id);
 if (colStats != null && !colStats.isEmpty()) {
  if (!colStats.hasNonNullValue()) {
   return ROWS_CANNOT_MATCH;
  }
  T upper = max(colStats, id);
  int cmp = lit.comparator().compare(upper, lit.value());
  if (cmp <= 0) {
   return ROWS_CANNOT_MATCH;
  }
 }
 return ROWS_MIGHT_MATCH;
}

@Override
public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
 // Rows must match when Min == X == Max
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 if (lowerBounds != null && lowerBounds.containsKey(id) &&
   upperBounds != null && upperBounds.containsKey(id)) {
  T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
  int cmp = lit.comparator().compare(lower, lit.value());
  if (cmp != 0) {
   return ROWS_MIGHT_NOT_MATCH;
  }
  T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));
  cmp = lit.comparator().compare(upper, lit.value());
  if (cmp != 0) {
   return ROWS_MIGHT_NOT_MATCH;
  }
  return ROWS_MUST_MATCH;
 }
 return ROWS_MIGHT_NOT_MATCH;
}

@Override
public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Long valueCount = valueCounts.get(id);
 if (valueCount == null) {
  // the column is not present and is all nulls
  return ROWS_CANNOT_MATCH;
 }
 Statistics<?> colStats = stats.get(id);
 if (colStats != null && !colStats.isEmpty()) {
  if (!colStats.hasNonNullValue()) {
   return ROWS_CANNOT_MATCH;
  }
  T upper = max(colStats, id);
  int cmp = lit.comparator().compare(upper, lit.value());
  if (cmp < 0) {
   return ROWS_CANNOT_MATCH;
  }
 }
 return ROWS_MIGHT_MATCH;
}

@Override
public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Long valueCount = valueCounts.get(id);
 if (valueCount == null) {
  // the column is not present and is all nulls
  return ROWS_CANNOT_MATCH;
 }
 Statistics<?> colStats = stats.get(id);
 if (colStats != null && !colStats.isEmpty()) {
  if (!colStats.hasNonNullValue()) {
   return ROWS_CANNOT_MATCH;
  }
  T lower = min(colStats, id);
  int cmp = lit.comparator().compare(lower, lit.value());
  if (cmp >= 0) {
   return ROWS_CANNOT_MATCH;
  }
 }
 return ROWS_MIGHT_MATCH;
}

@Override
public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
 // Rows must match when X < Min or Max < X because it is not in the range
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 if (lowerBounds != null && lowerBounds.containsKey(id)) {
  T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
  int cmp = lit.comparator().compare(lower, lit.value());
  if (cmp > 0) {
   return ROWS_MUST_MATCH;
  }
 }
 if (upperBounds != null && upperBounds.containsKey(id)) {
  T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));
  int cmp = lit.comparator().compare(upper, lit.value());
  if (cmp < 0) {
   return ROWS_MUST_MATCH;
  }
 }
 return ROWS_MIGHT_NOT_MATCH;
}

@Override
public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 if (lowerBounds != null && lowerBounds.containsKey(id)) {
  T lower = Conversions.fromByteBuffer(struct.field(id).type(), lowerBounds.get(id));
  int cmp = lit.comparator().compare(lower, lit.value());
  if (cmp > 0) {
   return ROWS_CANNOT_MATCH;
  }
 }
 if (upperBounds != null && upperBounds.containsKey(id)) {
  T upper = Conversions.fromByteBuffer(field.type(), upperBounds.get(id));
  int cmp = lit.comparator().compare(upper, lit.value());
  if (cmp < 0) {
   return ROWS_CANNOT_MATCH;
  }
 }
 return ROWS_MIGHT_MATCH;
}

@Override
public Type record(Schema record, List<String> names, List<Type> fieldTypes) {
 List<Schema.Field> fields = record.getFields();
 List<Types.NestedField> newFields = Lists.newArrayListWithExpectedSize(fields.size());
 if (root == record) {
  this.nextId = 0;
 }
 for (int i = 0; i < fields.size(); i += 1) {
  Schema.Field field = fields.get(i);
  Type fieldType = fieldTypes.get(i);
  int fieldId = getId(field);
  if (AvroSchemaUtil.isOptionSchema(field.schema())) {
   newFields.add(Types.NestedField.optional(fieldId, field.name(), fieldType));
  } else {
   newFields.add(Types.NestedField.required(fieldId, field.name(), fieldType));
  }
 }
 return Types.StructType.of(newFields);
}

@Override
public <T> Boolean notNull(BoundReference<T> ref) {
 // no need to check whether the field is required because binding evaluates that case
 // if the column has no non-null values, the expression cannot match
 Integer id = ref.fieldId();
 Preconditions.checkNotNull(struct.field(id),
   "Cannot filter by nested column: %s", schema.findField(id));
 Long valueCount = valueCounts.get(id);
 if (valueCount == null) {
  // the column is not present and is all nulls
  return ROWS_CANNOT_MATCH;
 }
 Statistics<?> colStats = stats.get(id);
 if (colStats != null && valueCount - colStats.getNumNulls() == 0) {
  // (num nulls == value count) => all values are null => no non-null values
  return ROWS_CANNOT_MATCH;
 }
 return ROWS_MIGHT_MATCH;
}

@Override
public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Boolean hasNonDictPage = isFallback.get(id);
 if (hasNonDictPage == null || hasNonDictPage) {
  return ROWS_MIGHT_MATCH;
 }
 Set<T> dictionary = dict(id, lit.comparator());
 // if any item in the dictionary matches the predicate, then at least one row does
 for (T item : dictionary) {
  int cmp = lit.comparator().compare(item, lit.value());
  if (cmp < 0) {
   return ROWS_MIGHT_MATCH;
  }
 }
 return ROWS_CANNOT_MATCH;
}

@Override
public <T> Boolean isNull(BoundReference<T> ref) {
 // no need to check whether the field is required because binding evaluates that case
 // if the column has no null values, the expression cannot match
 Integer id = ref.fieldId();
 Preconditions.checkNotNull(struct.field(id),
   "Cannot filter by nested column: %s", schema.findField(id));
 Long valueCount = valueCounts.get(id);
 if (valueCount == null) {
  // the column is not present and is all nulls
  return ROWS_MIGHT_MATCH;
 }
 Statistics<?> colStats = stats.get(id);
 if (colStats != null && !colStats.isEmpty() && colStats.getNumNulls() == 0) {
  // there are stats and no values are null => all values are non-null
  return ROWS_CANNOT_MATCH;
 }
 return ROWS_MIGHT_MATCH;
}

@Override
public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Boolean hasNonDictPage = isFallback.get(id);
 if (hasNonDictPage == null || hasNonDictPage) {
  return ROWS_MIGHT_MATCH;
 }
 Set<T> dictionary = dict(id, lit.comparator());
 // if any item in the dictionary matches the predicate, then at least one row does
 for (T item : dictionary) {
  int cmp = lit.comparator().compare(item, lit.value());
  if (cmp >= 0) {
   return ROWS_MIGHT_MATCH;
  }
 }
 return ROWS_CANNOT_MATCH;
}

@Override
public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Boolean hasNonDictPage = isFallback.get(id);
 if (hasNonDictPage == null || hasNonDictPage) {
  return ROWS_MIGHT_MATCH;
 }
 Set<T> dictionary = dict(id, lit.comparator());
 // if any item in the dictionary matches the predicate, then at least one row does
 for (T item : dictionary) {
  int cmp = lit.comparator().compare(item, lit.value());
  if (cmp > 0) {
   return ROWS_MIGHT_MATCH;
  }
 }
 return ROWS_CANNOT_MATCH;
}

@Override
public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
 Integer id = ref.fieldId();
 Types.NestedField field = struct.field(id);
 Preconditions.checkNotNull(field, "Cannot filter by nested column: %s", schema.findField(id));
 Boolean hasNonDictPage = isFallback.get(id);
 if (hasNonDictPage == null || hasNonDictPage) {
  return ROWS_MIGHT_MATCH;
 }
 Set<T> dictionary = dict(id, lit.comparator());
 // if any item in the dictionary matches the predicate, then at least one row does
 for (T item : dictionary) {
  int cmp = lit.comparator().compare(item, lit.value());
  if (cmp <= 0) {
   return ROWS_MIGHT_MATCH;
  }
 }
 return ROWS_CANNOT_MATCH;
}

@Test
public void testMapOfListToStructs() {
 Type map = Types.MapType.ofRequired(33, 34,
   Types.ListType.ofRequired(35, Types.IntegerType.get()),
   Types.StructType.of(
     required(36, "a", Types.IntegerType.get()),
     optional(37, "b", Types.IntegerType.get())
   ));
 Schema schema = AvroSchemaUtil.createMap(
   33, addElementId(35, Schema.createArray(Schema.create(Schema.Type.INT))),
   34, record("r34",
     requiredField(36, "a", Schema.create(Schema.Type.INT)),
     optionalField(37, "b", Schema.create(Schema.Type.INT))));
 Assert.assertEquals("Avro schema to map",
   map, AvroSchemaUtil.convert(schema));
 Assert.assertEquals("Map to Avro schema",
   schema, AvroSchemaUtil.convert(map));
}

@Test
public void complexNested() throws IOException {
 convertToPigSchema(new Schema(
   optional(1,"t", StructType.of(
     optional(2, "b", ListType.ofOptional(3,StructType.of(
       optional(4, "i", IntegerType.get()),
       optional(5,"s", StringType.get())
     )))
   )),
   optional(6, "m1", MapType.ofOptional(7,8, StringType.get(), StructType.of(
     optional(9, "b", ListType.ofOptional(10, BinaryType.get()) ),
     optional(11, "m2", MapType.ofOptional(12,13, StringType.get(), IntegerType.get()))
   ))),
   optional(14, "b1", ListType.ofOptional(15,
     MapType.ofOptional(16,17, StringType.get(),
       ListType.ofOptional(18, FloatType.get()))))
 ), "t:(b:{(i:int,s:chararray)}),m1:[(b:{(bytearray)},m2:[int])],b1:{([{(float)}])}", "");
}

/**
 * Reassigns ids in a schema from another schema.
 * <p>
 * Ids are determined by field names. If a field in the schema cannot be found in the source
 * schema, this will throw IllegalArgumentException.
 * <p>
 * This will not alter a schema's structure, nullability, or types.
 *
 * @param schema the schema to have ids reassigned
 * @param idSourceSchema the schema from which field ids will be used
 * @return an structurally identical schema with field ids matching the source schema
 * @throws IllegalArgumentException if a field cannot be found (by name) in the source schema
 */
public static Schema reassignIds(Schema schema, Schema idSourceSchema) {
 Types.StructType struct = visit(schema, new ReassignIds(idSourceSchema)).asStructType();
 return new Schema(struct.fields());
}

Most used methods

Popular in Java

Updating database using SQL prepared statement
getApplicationContext (Context)
addToBackStack (FragmentTransaction)
onRequestPermissionsResult (Fragment)
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
JButton (javax.swing)
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Top Vim plugins

How to useTypes$StructType in com.netflix.iceberg.types

Best Java code snippets using com.netflix.iceberg.types.Types$StructType (Showing top 20 results out of 315)

How to use
Types$StructType
in
com.netflix.iceberg.types