org.apache.flink.api.java.operators.DataSource.<init> java code examples

private <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, TypeInformation<X> type, String callLocationName) {
  return new DataSource<>(this, new ParallelIteratorInputFormat<>(iterator), type, callLocationName);
}

/**
 * Creates a DataSet from the given iterator. Because the iterator will remain unmodified until
 * the actual execution happens, the type of data returned by the iterator must be given
 * explicitly in the form of the type information. This method is useful for cases where the type
 * is generic. In that case, the type class (as given in {@link #fromCollection(Iterator, Class)}
 * does not supply all type information.
 *
 * <p>Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a parallelism of one.
 *
 * @param data The collection of elements to create the data set from.
 * @param type The TypeInformation for the produced data set.
 * @return A DataSet representing the elements in the iterator.
 *
 * @see #fromCollection(Iterator, Class)
 */
public <X> DataSource<X> fromCollection(Iterator<X> data, TypeInformation<X> type) {
  return new DataSource<>(this, new IteratorInputFormat<>(data), type, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 1-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0> DataSource<Tuple1<T0>> types(Class<T0> type0) {
  TupleTypeInfo<Tuple1<T0>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0);
  CsvInputFormat<Tuple1<T0>> inputFormat = new TupleCsvInputFormat<Tuple1<T0>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple1<T0>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 3-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2> DataSource<Tuple3<T0, T1, T2>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2) {
  TupleTypeInfo<Tuple3<T0, T1, T2>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1, type2);
  CsvInputFormat<Tuple3<T0, T1, T2>> inputFormat = new TupleCsvInputFormat<Tuple3<T0, T1, T2>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple3<T0, T1, T2>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 2-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1> DataSource<Tuple2<T0, T1>> types(Class<T0> type0, Class<T1> type1) {
  TupleTypeInfo<Tuple2<T0, T1>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1);
  CsvInputFormat<Tuple2<T0, T1>> inputFormat = new TupleCsvInputFormat<Tuple2<T0, T1>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple2<T0, T1>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 4-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3> DataSource<Tuple4<T0, T1, T2, T3>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3) {
  TupleTypeInfo<Tuple4<T0, T1, T2, T3>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1, type2, type3);
  CsvInputFormat<Tuple4<T0, T1, T2, T3>> inputFormat = new TupleCsvInputFormat<Tuple4<T0, T1, T2, T3>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple4<T0, T1, T2, T3>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 5-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4> DataSource<Tuple5<T0, T1, T2, T3, T4>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4) {
  TupleTypeInfo<Tuple5<T0, T1, T2, T3, T4>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1, type2, type3, type4);
  CsvInputFormat<Tuple5<T0, T1, T2, T3, T4>> inputFormat = new TupleCsvInputFormat<Tuple5<T0, T1, T2, T3, T4>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple5<T0, T1, T2, T3, T4>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 6-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5> DataSource<Tuple6<T0, T1, T2, T3, T4, T5>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5) {
  TupleTypeInfo<Tuple6<T0, T1, T2, T3, T4, T5>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1, type2, type3, type4, type5);
  CsvInputFormat<Tuple6<T0, T1, T2, T3, T4, T5>> inputFormat = new TupleCsvInputFormat<Tuple6<T0, T1, T2, T3, T4, T5>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple6<T0, T1, T2, T3, T4, T5>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

private <X> DataSource<X> fromCollection(Collection<X> data, TypeInformation<X> type, String callLocationName) {
  CollectionInputFormat.checkCollection(data, type.getTypeClass());
  return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, callLocationName);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 7-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6> DataSource<Tuple7<T0, T1, T2, T3, T4, T5, T6>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6) {
  TupleTypeInfo<Tuple7<T0, T1, T2, T3, T4, T5, T6>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6);
  CsvInputFormat<Tuple7<T0, T1, T2, T3, T4, T5, T6>> inputFormat = new TupleCsvInputFormat<Tuple7<T0, T1, T2, T3, T4, T5, T6>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple7<T0, T1, T2, T3, T4, T5, T6>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 8-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7> DataSource<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7) {
  TupleTypeInfo<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7);
  CsvInputFormat<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> inputFormat = new TupleCsvInputFormat<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 9-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @param type8 The type of CSV field 8 and the type of field 8 in the returned tuple type.
 * @return The {@link org.apache.flink.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7, T8> DataSource<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7, Class<T8> type8) {
  TupleTypeInfo<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> types = TupleTypeInfo.getBasicAndBasicValueTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7, type8);
  CsvInputFormat<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> inputFormat = new TupleCsvInputFormat<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>>(path, types, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>>(executionContext, inputFormat, types, Utils.getCallLocationName());
}

/**
 * Configures the reader to read the CSV data and parse it to the given type. The type must be a subclass of
 * {@link Tuple}. The type information for the fields is obtained from the type class. The type
 * consequently needs to specify all generic field types of the tuple.
 *
 * @param targetType The class of the target type, needs to be a subclass of Tuple.
 * @return The DataSet representing the parsed CSV data.
 */
public <T extends Tuple> DataSource<T> tupleType(Class<T> targetType) {
  Preconditions.checkNotNull(targetType, "The target type class must not be null.");
  if (!Tuple.class.isAssignableFrom(targetType)) {
    throw new IllegalArgumentException("The target type must be a subclass of " + Tuple.class.getName());
  }
  @SuppressWarnings("unchecked")
  TupleTypeInfo<T> typeInfo = (TupleTypeInfo<T>) TypeExtractor.createTypeInfo(targetType);
  CsvInputFormat<T> inputFormat = new TupleCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, typeInfo, this.includedMask);
  Class<?>[] classes = new Class<?>[typeInfo.getArity()];
  for (int i = 0; i < typeInfo.getArity(); i++) {
    classes[i] = typeInfo.getTypeAt(i).getTypeClass();
  }
  configureInputFormat(inputFormat);
  return new DataSource<T>(executionContext, inputFormat, typeInfo, Utils.getCallLocationName());
}

/**
 * Configures the reader to read the CSV data and parse it to the given type. The all fields of the type
 * must be public or able to set value. The type information for the fields is obtained from the type class.
 *
 * @param pojoType The class of the target POJO.
 * @param pojoFields The fields of the POJO which are mapped to CSV fields.
 * @return The DataSet representing the parsed CSV data.
 */
public <T> DataSource<T> pojoType(Class<T> pojoType, String... pojoFields) {
  Preconditions.checkNotNull(pojoType, "The POJO type class must not be null.");
  Preconditions.checkNotNull(pojoFields, "POJO fields must be specified (not null) if output type is a POJO.");
  final TypeInformation<T> ti = TypeExtractor.createTypeInfo(pojoType);
  if (!(ti instanceof PojoTypeInfo)) {
    throw new IllegalArgumentException(
      "The specified class is not a POJO. The type class must meet the POJO requirements. Found: " + ti);
  }
  final PojoTypeInfo<T> pti = (PojoTypeInfo<T>) ti;
  CsvInputFormat<T> inputFormat = new PojoCsvInputFormat<T>(path, this.lineDelimiter, this.fieldDelimiter, pti, pojoFields, this.includedMask);
  configureInputFormat(inputFormat);
  return new DataSource<T>(executionContext, inputFormat, pti, Utils.getCallLocationName());
}

/**
 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
 * The {@link java.nio.charset.Charset} with the given name will be used to read the files.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param charsetName The name of the character set used to read the file.
 * @return A {@link DataSet} that represents the data read from the given file as text lines.
 */
public DataSource<String> readTextFile(String filePath, String charsetName) {
  Preconditions.checkNotNull(filePath, "The file path may not be null.");
  TextInputFormat format = new TextInputFormat(new Path(filePath));
  format.setCharsetName(charsetName);
  return new DataSource<>(this, format, BasicTypeInfo.STRING_TYPE_INFO, Utils.getCallLocationName());
}

/**
 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
 * The file will be read with the UTF-8 character set.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @return A {@link DataSet} that represents the data read from the given file as text lines.
 */
public DataSource<String> readTextFile(String filePath) {
  Preconditions.checkNotNull(filePath, "The file path may not be null.");
  return new DataSource<>(this, new TextInputFormat(new Path(filePath)), BasicTypeInfo.STRING_TYPE_INFO, Utils.getCallLocationName());
}

/**
 * Creates a {@link DataSet} that represents the primitive type produced by reading the given file line wise.
 * This method is similar to {@link #readCsvFile(String)} with single field, but it produces a DataSet not through
 * {@link org.apache.flink.api.java.tuple.Tuple1}.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param typeClass The primitive type class to be read.
 * @return A {@link DataSet} that represents the data read from the given file as primitive type.
 */
public <X> DataSource<X> readFileOfPrimitives(String filePath, Class<X> typeClass) {
  Preconditions.checkNotNull(filePath, "The file path may not be null.");
  return new DataSource<>(this, new PrimitiveInputFormat<>(new Path(filePath), typeClass), TypeExtractor.getForClass(typeClass), Utils.getCallLocationName());
}

/**
 * Creates a {@link DataSet} that represents the primitive type produced by reading the given file in delimited way.
 * This method is similar to {@link #readCsvFile(String)} with single field, but it produces a DataSet not through
 * {@link org.apache.flink.api.java.tuple.Tuple1}.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param delimiter The delimiter of the given file.
 * @param typeClass The primitive type class to be read.
 * @return A {@link DataSet} that represents the data read from the given file as primitive type.
 */
public <X> DataSource<X> readFileOfPrimitives(String filePath, String delimiter, Class<X> typeClass) {
  Preconditions.checkNotNull(filePath, "The file path may not be null.");
  return new DataSource<>(this, new PrimitiveInputFormat<>(new Path(filePath), delimiter, typeClass), TypeExtractor.getForClass(typeClass), Utils.getCallLocationName());
}

/**
 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
 * This method is similar to {@link #readTextFile(String)}, but it produces a DataSet with mutable
 * {@link StringValue} objects, rather than Java Strings. StringValues can be used to tune implementations
 * to be less object and garbage collection heavy.
 *
 * <p>The file will be read with the UTF-8 character set.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @return A {@link DataSet} that represents the data read from the given file as text lines.
 */
public DataSource<StringValue> readTextFileWithValue(String filePath) {
  Preconditions.checkNotNull(filePath, "The file path may not be null.");
  return new DataSource<>(this, new TextValueInputFormat(new Path(filePath)), new ValueTypeInfo<>(StringValue.class), Utils.getCallLocationName());
}

/**
 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
 * This method is similar to {@link #readTextFile(String, String)}, but it produces a DataSet with mutable
 * {@link StringValue} objects, rather than Java Strings. StringValues can be used to tune implementations
 * to be less object and garbage collection heavy.
 *
 * <p>The {@link java.nio.charset.Charset} with the given name will be used to read the files.
 *
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param charsetName The name of the character set used to read the file.
 * @param skipInvalidLines A flag to indicate whether to skip lines that cannot be read with the given character set.
 *
 * @return A DataSet that represents the data read from the given file as text lines.
 */
public DataSource<StringValue> readTextFileWithValue(String filePath, String charsetName, boolean skipInvalidLines) {
  Preconditions.checkNotNull(filePath, "The file path may not be null.");
  TextValueInputFormat format = new TextValueInputFormat(new Path(filePath));
  format.setCharsetName(charsetName);
  format.setSkipInvalidLines(skipInvalidLines);
  return new DataSource<>(this, format, new ValueTypeInfo<>(StringValue.class), Utils.getCallLocationName());
}

How to use org.apache.flink.api.java.operators.DataSourceconstructor

Best Java code snippets using org.apache.flink.api.java.operators.DataSource.<init> (Showing top 20 results out of 315)

How to use
org.apache.flink.api.java.operators.DataSource
constructor