eu.stratosphere.api.java.operators.DataSource.<init> java code examples

/**
 * Generic method to create an input DataSet with in {@link InputFormat}. The DataSet will not be
 * immediately created - instead, this method returns a DataSet that will be lazily created from
 * the input format once the program is executed.
 * <p>
 * The data set is typed to the given TypeInformation. This method is intended for input formats that
 * where the return type cannot be determined by reflection analysis, and that do not implement the
 * {@link ResultTypeQueryable} interface.
 * 
 * @param inputFormat The input format used to create the data set.
 * @return A DataSet that represents the data created by the input format.
 * 
 * @see #createInput(InputFormat)
 */
public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat, TypeInformation<X> producedType) {
  if (inputFormat == null) {
    throw new IllegalArgumentException("InputFormat must not be null.");
  }
  
  if (producedType == null) {
    throw new IllegalArgumentException("Produced type information must not be null.");
  }
  
  return new DataSource<X>(this, inputFormat, producedType);
}

/**
 * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the
 * framework to create a parallel data source that returns the elements in the iterator.
 * The iterator must be serializable (as defined in {@link java.io.Serializable}, because the
 * execution environment may ship the elements into the cluster.
 * <p>
 * Because the iterator will remain unmodified until the actual execution happens, the type of data
 * returned by the iterator must be given explicitly in the form of the type information.
 * This method is useful for cases where the type is generic. In that case, the type class
 * (as given in {@link #fromParallelCollection(SplittableIterator, Class)} does not supply all type information.
 * 
 * @param iterator The iterator that produces the elements of the data set.
 * @param type The TypeInformation for the produced data set.
 * @return A DataSet representing the elements in the iterator.
 * 
 * @see #fromParallelCollection(SplittableIterator, Class)
 */
public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, TypeInformation<X> type) {
  return new DataSource<X>(this, new ParallelIteratorInputFormat<X>(iterator), type);
}

/**
 * Creates a DataSet that represents the Strings produced by reading the given file line wise.
 * The {@link java.nio.charset.Charset} with the given name will be used to read the files.
 * 
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param charsetName The name of the character set used to read the file.
 * @return A DataSet that represents the data read from the given file as text lines.
 */
public DataSource<String> readTextFile(String filePath, String charsetName) {
  Validate.notNull(filePath, "The file path may not be null.");
  TextInputFormat format = new TextInputFormat(new Path(filePath));
  format.setCharsetName(charsetName);
  return new DataSource<String>(this, format, BasicTypeInfo.STRING_TYPE_INFO );
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 1-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0> DataSource<Tuple1<T0>> types(Class<T0> type0) {
  TupleTypeInfo<Tuple1<T0>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0);
  CsvInputFormat<Tuple1<T0>> inputFormat = new CsvInputFormat<Tuple1<T0>>(path);
  configureInputFormat(inputFormat, type0);
  return new DataSource<Tuple1<T0>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 2-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1> DataSource<Tuple2<T0, T1>> types(Class<T0> type0, Class<T1> type1) {
  TupleTypeInfo<Tuple2<T0, T1>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1);
  CsvInputFormat<Tuple2<T0, T1>> inputFormat = new CsvInputFormat<Tuple2<T0, T1>>(path);
  configureInputFormat(inputFormat, type0, type1);
  return new DataSource<Tuple2<T0, T1>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 5-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4> DataSource<Tuple5<T0, T1, T2, T3, T4>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4) {
  TupleTypeInfo<Tuple5<T0, T1, T2, T3, T4>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4);
  CsvInputFormat<Tuple5<T0, T1, T2, T3, T4>> inputFormat = new CsvInputFormat<Tuple5<T0, T1, T2, T3, T4>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4);
  return new DataSource<Tuple5<T0, T1, T2, T3, T4>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 6-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5> DataSource<Tuple6<T0, T1, T2, T3, T4, T5>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5) {
  TupleTypeInfo<Tuple6<T0, T1, T2, T3, T4, T5>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5);
  CsvInputFormat<Tuple6<T0, T1, T2, T3, T4, T5>> inputFormat = new CsvInputFormat<Tuple6<T0, T1, T2, T3, T4, T5>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5);
  return new DataSource<Tuple6<T0, T1, T2, T3, T4, T5>>(executionContext, inputFormat, types);
}

/**
 * Creates a DataSet that represents the Strings produced by reading the given file line wise.
 * The file will be read with the system's default character set.
 * 
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @return A DataSet that represents the data read from the given file as text lines.
 */
public DataSource<String> readTextFile(String filePath) {
  Validate.notNull(filePath, "The file path may not be null.");
  
  return new DataSource<String>(this, new TextInputFormat(new Path(filePath)), BasicTypeInfo.STRING_TYPE_INFO );
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 3-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2> DataSource<Tuple3<T0, T1, T2>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2) {
  TupleTypeInfo<Tuple3<T0, T1, T2>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2);
  CsvInputFormat<Tuple3<T0, T1, T2>> inputFormat = new CsvInputFormat<Tuple3<T0, T1, T2>>(path);
  configureInputFormat(inputFormat, type0, type1, type2);
  return new DataSource<Tuple3<T0, T1, T2>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 4-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3> DataSource<Tuple4<T0, T1, T2, T3>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3) {
  TupleTypeInfo<Tuple4<T0, T1, T2, T3>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3);
  CsvInputFormat<Tuple4<T0, T1, T2, T3>> inputFormat = new CsvInputFormat<Tuple4<T0, T1, T2, T3>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3);
  return new DataSource<Tuple4<T0, T1, T2, T3>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 7-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6> DataSource<Tuple7<T0, T1, T2, T3, T4, T5, T6>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6) {
  TupleTypeInfo<Tuple7<T0, T1, T2, T3, T4, T5, T6>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6);
  CsvInputFormat<Tuple7<T0, T1, T2, T3, T4, T5, T6>> inputFormat = new CsvInputFormat<Tuple7<T0, T1, T2, T3, T4, T5, T6>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5, type6);
  return new DataSource<Tuple7<T0, T1, T2, T3, T4, T5, T6>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 8-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7> DataSource<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7) {
  TupleTypeInfo<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7);
  CsvInputFormat<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>> inputFormat = new CsvInputFormat<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5, type6, type7);
  return new DataSource<Tuple8<T0, T1, T2, T3, T4, T5, T6, T7>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 10-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @param type8 The type of CSV field 8 and the type of field 8 in the returned tuple type.
 * @param type9 The type of CSV field 9 and the type of field 9 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> DataSource<Tuple10<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7, Class<T8> type8, Class<T9> type9) {
  TupleTypeInfo<Tuple10<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7, type8, type9);
  CsvInputFormat<Tuple10<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>> inputFormat = new CsvInputFormat<Tuple10<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5, type6, type7, type8, type9);
  return new DataSource<Tuple10<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 11-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @param type8 The type of CSV field 8 and the type of field 8 in the returned tuple type.
 * @param type9 The type of CSV field 9 and the type of field 9 in the returned tuple type.
 * @param type10 The type of CSV field 10 and the type of field 10 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> DataSource<Tuple11<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7, Class<T8> type8, Class<T9> type9, Class<T10> type10) {
  TupleTypeInfo<Tuple11<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7, type8, type9, type10);
  CsvInputFormat<Tuple11<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>> inputFormat = new CsvInputFormat<Tuple11<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5, type6, type7, type8, type9, type10);
  return new DataSource<Tuple11<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 12-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @param type8 The type of CSV field 8 and the type of field 8 in the returned tuple type.
 * @param type9 The type of CSV field 9 and the type of field 9 in the returned tuple type.
 * @param type10 The type of CSV field 10 and the type of field 10 in the returned tuple type.
 * @param type11 The type of CSV field 11 and the type of field 11 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> DataSource<Tuple12<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7, Class<T8> type8, Class<T9> type9, Class<T10> type10, Class<T11> type11) {
  TupleTypeInfo<Tuple12<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7, type8, type9, type10, type11);
  CsvInputFormat<Tuple12<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11>> inputFormat = new CsvInputFormat<Tuple12<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5, type6, type7, type8, type9, type10, type11);
  return new DataSource<Tuple12<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 13-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @param type8 The type of CSV field 8 and the type of field 8 in the returned tuple type.
 * @param type9 The type of CSV field 9 and the type of field 9 in the returned tuple type.
 * @param type10 The type of CSV field 10 and the type of field 10 in the returned tuple type.
 * @param type11 The type of CSV field 11 and the type of field 11 in the returned tuple type.
 * @param type12 The type of CSV field 12 and the type of field 12 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> DataSource<Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7, Class<T8> type8, Class<T9> type9, Class<T10> type10, Class<T11> type11, Class<T12> type12) {
  TupleTypeInfo<Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7, type8, type9, type10, type11, type12);
  CsvInputFormat<Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>> inputFormat = new CsvInputFormat<Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5, type6, type7, type8, type9, type10, type11, type12);
  return new DataSource<Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>>(executionContext, inputFormat, types);
}

/**
 * Specifies the types for the CSV fields. This method parses the CSV data to a 9-tuple
 * which has fields of the specified types.
 * This method is overloaded for each possible length of the tuples to support type safe
 * creation of data sets through CSV parsing.
 *
 * @param type0 The type of CSV field 0 and the type of field 0 in the returned tuple type.
 * @param type1 The type of CSV field 1 and the type of field 1 in the returned tuple type.
 * @param type2 The type of CSV field 2 and the type of field 2 in the returned tuple type.
 * @param type3 The type of CSV field 3 and the type of field 3 in the returned tuple type.
 * @param type4 The type of CSV field 4 and the type of field 4 in the returned tuple type.
 * @param type5 The type of CSV field 5 and the type of field 5 in the returned tuple type.
 * @param type6 The type of CSV field 6 and the type of field 6 in the returned tuple type.
 * @param type7 The type of CSV field 7 and the type of field 7 in the returned tuple type.
 * @param type8 The type of CSV field 8 and the type of field 8 in the returned tuple type.
 * @return The {@link eu.stratosphere.api.java.DataSet} representing the parsed CSV data.
 */
public <T0, T1, T2, T3, T4, T5, T6, T7, T8> DataSource<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> types(Class<T0> type0, Class<T1> type1, Class<T2> type2, Class<T3> type3, Class<T4> type4, Class<T5> type5, Class<T6> type6, Class<T7> type7, Class<T8> type8) {
  TupleTypeInfo<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> types = TupleTypeInfo.getBasicTupleTypeInfo(type0, type1, type2, type3, type4, type5, type6, type7, type8);
  CsvInputFormat<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> inputFormat = new CsvInputFormat<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>>(path);
  configureInputFormat(inputFormat, type0, type1, type2, type3, type4, type5, type6, type7, type8);
  return new DataSource<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>>(executionContext, inputFormat, types);
}

/**
 * Creates a DataSet from the given non-empty collection. The type of the data set is that
 * of the elements in the collection. The elements need to be serializable (as defined by
 * {@link java.io.Serializable}), because the framework may move the elements into the cluster
 * if needed.
 * <p>
 * Note that this operation will result in a non-parallel data source, i.e. a data source with
 * a degree of parallelism of one.
 * <p>
 * The returned DataSet is typed to the given TypeInformation.
 *  
 * @param data The collection of elements to create the data set from.
 * @param type The TypeInformation for the produced data set.
 * @return A DataSet representing the given collection.
 * 
 * @see #fromCollection(Collection)
 */
public <X> DataSource<X> fromCollection(Collection<X> data, TypeInformation<X> type) {
  CollectionInputFormat.checkCollection(data, type.getTypeClass());
  
  return new DataSource<X>(this, new CollectionInputFormat<X>(data), type);
}

/**
 * Creates a DataSet that represents the Strings produced by reading the given file line wise.
 * This method is similar to {@link #readTextFile(String)}, but it produces a DataSet with mutable
 * {@link StringValue} objects, rather than Java Strings. StringValues can be used to tune implementations
 * to be less object and garbage collection heavy.
 * <p>
 * The file will be read with the system's default character set.
 * 
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @return A DataSet that represents the data read from the given file as text lines.
 */
public DataSource<StringValue> readTextFileWithValue(String filePath) {
  Validate.notNull(filePath, "The file path may not be null.");
  
  return new DataSource<StringValue>(this, new TextValueInputFormat(new Path(filePath)), new ValueTypeInfo<StringValue>(StringValue.class) );
}

/**
 * Creates a DataSet that represents the Strings produced by reading the given file line wise.
 * This method is similar to {@link #readTextFile(String, String)}, but it produces a DataSet with mutable
 * {@link StringValue} objects, rather than Java Strings. StringValues can be used to tune implementations
 * to be less object and garbage collection heavy.
 * <p>
 * The {@link java.nio.charset.Charset} with the given name will be used to read the files.
 * 
 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @param charsetName The name of the character set used to read the file.
 * @param skipInvalidLines A flag to indicate whether to skip lines that cannot be read with the given character set.
 * 
 * @return A DataSet that represents the data read from the given file as text lines.
 */
public DataSource<StringValue> readTextFileWithValue(String filePath, String charsetName, boolean skipInvalidLines) {
  Validate.notNull(filePath, "The file path may not be null.");
  
  TextValueInputFormat format = new TextValueInputFormat(new Path(filePath));
  format.setCharsetName(charsetName);
  format.setSkipInvalidLines(skipInvalidLines);
  return new DataSource<StringValue>(this, format, new ValueTypeInfo<StringValue>(StringValue.class) );
}

Javadoc

Creates a new data source.

Popular methods of DataSource

Popular in Java

Creating JSON documents from java classes using gson
getSupportFragmentManager (FragmentActivity)
scheduleAtFixedRate (ScheduledExecutorService)
setContentView (Activity)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Collectors (java.util.stream)
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
From CI to AI: The AI layer in your organization

How to use eu.stratosphere.api.java.operators.DataSourceconstructor

Best Java code snippets using eu.stratosphere.api.java.operators.DataSource.<init> (Showing top 20 results out of 315)

How to use
eu.stratosphere.api.java.operators.DataSource
constructor