public static <X extends Tuple> TupleTypeInfo<X> getBasicTupleTypeInfo(Class<?>... basicTypes) { if (basicTypes == null || basicTypes.length == 0) { throw new IllegalArgumentException(); } TypeInformation<?>[] infos = new TypeInformation<?>[basicTypes.length]; for (int i = 0; i < infos.length; i++) { Class<?> type = basicTypes[i]; if (type == null) { throw new IllegalArgumentException("Type at position " + i + " is null."); } TypeInformation<?> info = BasicTypeInfo.getInfoFor(type); if (info == null) { throw new IllegalArgumentException("Type at position " + i + " is not a basic type."); } infos[i] = info; } @SuppressWarnings("unchecked") TupleTypeInfo<X> tupleInfo = (TupleTypeInfo<X>) new TupleTypeInfo<Tuple>(infos); return tupleInfo; }
public AggregateOperator<IN> and(Aggregations function, int field) { Validate.notNull(function); TupleTypeInfo<?> inType = (TupleTypeInfo<?>) getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); this.aggregationFunctions.add(aggFunct); this.fields.add(field); return this; }
protected ProjectJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType) { super(input1, input2, keys1, keys2, new ProjectJoinFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer().createInstance()), returnType, hint); } }
@Override public TupleSerializer<T> createSerializer() { TypeSerializer<?>[] fieldSerializers = new TypeSerializer<?>[getArity()]; for (int i = 0; i < types.length; i++) { fieldSerializers[i] = types[i].createSerializer(); } Class<T> tupleClass = getTypeClass(); return new TupleSerializer<T>(tupleClass, fieldSerializers); }
TypeInformation<VertexKey> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0); TypeInformation<Tuple2<VertexKey, Message>> messageTypeInfo = new TupleTypeInfo<Tuple2<VertexKey,Message>>(keyType, messageType);
private TypeInformation<?>[] extractFieldTypes(int[] fields, Class<?>[] givenTypes, TypeInformation<?> inType) { TupleTypeInfo<?> inTupleType = (TupleTypeInfo<?>) inType; TypeInformation<?>[] fieldTypes = new TypeInformation[fields.length]; for(int i=0; i<fields.length; i++) { if(inTupleType.getTypeAt(fields[i]).getTypeClass() != givenTypes[i]) { throw new IllegalArgumentException("Given types do not match types of input data set."); } fieldTypes[i] = inTupleType.getTypeAt(fields[i]); } return fieldTypes; }
/** * Specifies the types for the CSV fields. This method parses the CSV data to a 1-tuple * which has fields of the specified types. * This method is overloaded for each possible length of the tuples to support type safe * creation of data sets through CSV parsing. * * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type. * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data. */ public <T0> DataSource<Tuple1<T0>> types(Class<T0> type0) { TupleTypeInfo<Tuple1<T0>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0); CsvInputFormat<Tuple1<T0>> inputFormat = new CsvInputFormat<Tuple1<T0>>(path); configureInputFormat(inputFormat, type0); return new DataSource<Tuple1<T0>>(executionContext, inputFormat, types); }
private static int[] makeFields(int[] fields, TupleTypeInfo<?> type) { int inLength = type.getArity(); // null parameter means all fields are considered if (fields == null || fields.length == 0) { fields = new int[inLength]; for (int i = 0; i < inLength; i++) { fields[i] = i; } return fields; } else { return rangeCheckAndOrderFields(fields, inLength-1); } }
return createLeadingFieldComparator(orders[0], types[0]);
private static TypeInformation<?> findCorrespondingInfo(TypeVariable<?> typeVar, Type type, TypeInformation<?> corrInfo) { if (type instanceof TypeVariable) { TypeVariable<?> variable = (TypeVariable<?>) type; if (variable.getName().equals(typeVar.getName()) && variable.getGenericDeclaration().equals(typeVar.getGenericDeclaration())) { return corrInfo; } } else if (type instanceof ParameterizedType && Tuple.class.isAssignableFrom((Class<?>) ((ParameterizedType) type).getRawType())) { ParameterizedType tuple = (ParameterizedType) type; Type[] args = tuple.getActualTypeArguments(); for (int i = 0; i < args.length; i++) { TypeInformation<?> info = findCorrespondingInfo(typeVar, args[i], ((TupleTypeInfo<?>) corrInfo).getTypeAt(i)); if (info != null) { return info; } } } return null; }
/** * Specifies the types for the CSV fields. This method parses the CSV data to a 2-tuple * which has fields of the specified types. * This method is overloaded for each possible length of the tuples to support type safe * creation of data sets through CSV parsing. * * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type. * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type. * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data. */ public <T0, T1> DataSource<Tuple2<T0, T1>> types(Class<T0> type0, Class<T1> type1) { TupleTypeInfo<Tuple2<T0, T1>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1); CsvInputFormat<Tuple2<T0, T1>> inputFormat = new CsvInputFormat<Tuple2<T0, T1>>(path); configureInputFormat(inputFormat, type0, type1); return new DataSource<Tuple2<T0, T1>>(executionContext, inputFormat, types); }
public Projection(DataSet<T> ds, int[] fieldIndexes) { if(!(ds.getType() instanceof TupleTypeInfo)) { throw new UnsupportedOperationException("project() can only be applied to DataSets of Tuples."); } if(fieldIndexes.length == 0) { throw new IllegalArgumentException("project() needs to select at least one (1) field."); } else if(fieldIndexes.length > 22) { throw new IllegalArgumentException("project() may select only up to twenty-two (22) fields."); } int maxFieldIndex = ((TupleTypeInfo<?>)ds.getType()).getArity(); for(int i=0; i<fieldIndexes.length; i++) { if(fieldIndexes[i] > maxFieldIndex - 1) { throw new IndexOutOfBoundsException("Provided field index is out of bounds of input tuple."); } } this.ds = ds; this.fieldIndexes = fieldIndexes; }
/** * Projects a pair of crossed elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @param type0 The class of field '0' of the result tuples. * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0> ProjectCross<I1, I2, Tuple1<T0>> types(Class<T0> type0) { Class<?>[] types = {type0}; if(types.length != this.fieldIndexes.length) { throw new IllegalArgumentException("Numbers of projected fields and types do not match."); } TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, types); TupleTypeInfo<Tuple1<T0>> tType = new TupleTypeInfo<Tuple1<T0>>(fTypes); return new ProjectCross<I1, I2, Tuple1<T0>>(this.ds1, this.ds2, this.fieldIndexes, this.isFieldInFirst, tType); }
/** * Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of * {@link Tuple}. The type information for the fields is obtained from the type class. The type * consequently needs to specify all generic field types of the tuple. * * @param targetType The class of the target type, needs to be a subclass of Tuple. * @return The DataSet representing the parsed CSV data. */ public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) { Validate.notNull(targetType, "The target type class must not be null."); if (!Tuple.class.isAssignableFrom(targetType)) { throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName()); } @SuppressWarnings("unchecked") TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType); CsvInputFormat<T> inputFormat = new CsvInputFormat<T>(path); Class<?>[] classes = new Class<?>[typeInfo.getArity()]; for (int i = 0; i < typeInfo.getArity(); i++) { classes[i] = typeInfo.getTypeAt(i).getTypeClass(); } configureInputFormat(inputFormat, classes); return new DataSource<T>(executionContext, inputFormat, typeInfo); }
public FieldPositionKeys(int[] groupingFields, TypeInformation<T> type, boolean allowEmpty) { if (!type.isTupleType()) { throw new InvalidProgramException("Specifying keys via field positions is only valid for tuple data types"); } if (!allowEmpty && (groupingFields == null || groupingFields.length == 0)) { throw new IllegalArgumentException("The grouping fields must not be empty."); } TupleTypeInfo<?> tupleType = (TupleTypeInfo<?>)type; this.groupingFields = makeFields(groupingFields, (TupleTypeInfo<?>) type); types = new TypeInformation[this.groupingFields.length]; for(int i = 0; i < this.groupingFields.length; i++) { types[i] = tupleType.getTypeAt(this.groupingFields[i]); } }
/** * Specifies the types for the CSV fields. This method parses the CSV data to a 3-tuple * which has fields of the specified types. * This method is overloaded for each possible length of the tuples to support type safe * creation of data sets through CSV parsing. * * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type. * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type. * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type. * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data. */ public <T0, T1, T2> DataSource<Tuple3<T0, T1, T2>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2) { TupleTypeInfo<Tuple3<T0, T1, T2>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2); CsvInputFormat<Tuple3<T0, T1, T2>> inputFormat = new CsvInputFormat<Tuple3<T0, T1, T2>>(path); configureInputFormat(inputFormat, type0, type1, type2); return new DataSource<Tuple3<T0, T1, T2>>(executionContext, inputFormat, types); }
protected ProjectCross(DataSet<I1> input1, DataSet<I2> input2, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType) { super(input1, input2, new ProjectCrossFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer().createInstance()), returnType); } }
/** * Projects a pair of joined elements to a {@link Tuple} with the previously selected fields. * Requires the classes of the fields of the resulting tuples. * * @param type0 The class of field '0' of the result tuples. * @return The projected data set. * * @see Tuple * @see DataSet */ public <T0> ProjectJoin<I1, I2, Tuple1<T0>> types(Class<T0> type0) { Class<?>[] types = {type0}; if(types.length != this.fieldIndexes.length) { throw new IllegalArgumentException("Numbers of projected fields and types do not match."); } TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, types); TupleTypeInfo<Tuple1<T0>> tType = new TupleTypeInfo<Tuple1<T0>>(fTypes); return new ProjectJoin<I1, I2, Tuple1<T0>>(this.ds1, this.ds2, this.keys1, this.keys2, this.hint, this.fieldIndexes, this.isFieldInFirst, tType); }
/** * <p> * Non grouped aggregation */ public AggregateOperator(DataSet<IN> input, Aggregations function, int field) { super(Validate.notNull(input), input.getType()); Validate.notNull(function); if (!input.getType().isTupleType()) { throw new InvalidProgramException("Aggregating on field positions is only possible on tuple data types."); } TupleTypeInfo<?> inType = (TupleTypeInfo<?>) input.getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); // this is the first aggregation operator after a regular data set (non grouped aggregation) this.aggregationFunctions.add(aggFunct); this.fields.add(field); this.grouping = null; }