/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
private static SingleOutputStreamOperator<Order> getOrdersDataStream(StreamExecutionEnvironment env, String ordersPath, boolean useSourceV2) { final CsvReader csvReader = new CsvReader(ordersPath, ExecutionEnvironment.getExecutionEnvironment()) .fieldDelimiter("|") .includeFields("110010010"); final TupleCsvInputFormat<Order> inputFormat = csvReader.generateTupleCsvInputFormat(Order.class); if (useSourceV2) { return env.createInputV2(inputFormat, inputFormat.getTupleTypeInfo(), "Order source v2"); } else { return env.createInput(inputFormat, inputFormat.getTupleTypeInfo(), "Order source v1"); } }
FileProcessingMode.PROCESS_ONCE, -1); } else { source = createInput(inputFormat, typeInfo, "Custom Source");
private static SingleOutputStreamOperator<Lineitem> getLineitemDataStream(StreamExecutionEnvironment env, String lineitemPath, boolean useSourceV2) { final CsvReader csvReader = new CsvReader(lineitemPath, ExecutionEnvironment.getExecutionEnvironment()) .fieldDelimiter("|") .includeFields("1000011000100000"); final TupleCsvInputFormat<Lineitem> inputFormat = csvReader.generateTupleCsvInputFormat(Lineitem.class); if (useSourceV2) { return env.createInputV2(inputFormat, inputFormat.getTupleTypeInfo(), "Lineitem source v2"); } else { return env.createInput(inputFormat, inputFormat.getTupleTypeInfo(), "Lineitem source v1"); } }
@Override public DataStream<BaseRow> getBoundedStream(StreamExecutionEnvironment streamEnv) { try { List<Partition> partitionList; if (null == prunedPartitions || prunedPartitions.size() == 0){ partitionList = allPartitions; } else { partitionList = prunedPartitions; } return streamEnv.createInput( new HiveTableInputFormat.Builder(rowTypeInfo, jobConf, dbName, tableName, isPartitionTable, partitionColNames, partitionList).build()).name(explainSource()); } catch (Exception e){ logger.error("Can not normally create hiveTableInputFormat !", e); throw new RuntimeException(e); } }
private static SingleOutputStreamOperator<Customer> getCustomerDataStream(StreamExecutionEnvironment env, String customerPath, boolean useSourceV2) { final CsvReader csvReader = new CsvReader(customerPath, ExecutionEnvironment.getExecutionEnvironment()) .fieldDelimiter("|") .includeFields("10000010"); final TupleCsvInputFormat<Customer> inputFormat = csvReader.generateTupleCsvInputFormat(Customer.class); if (useSourceV2) { return env.createInputV2(inputFormat, inputFormat.getTupleTypeInfo(), "Custom source v2"); } else { return env.createInput(inputFormat, inputFormat.getTupleTypeInfo(), "Custom source v1"); } }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
/** * Generic method to create an input data stream with {@link org.apache.flink.api.common.io.InputFormat}. * * <p>Since all data streams need specific information about their types, this method needs to determine the * type of the data produced by the input format. It will attempt to determine the data type by reflection, * unless the input format implements the {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable} interface. * In the latter case, this method will invoke the * {@link org.apache.flink.api.java.typeutils.ResultTypeQueryable#getProducedType()} method to determine data * type produced by the input format. * * <p><b>NOTES ON CHECKPOINTING: </b> In the case of a {@link FileInputFormat}, the source * (which executes the {@link ContinuousFileMonitoringFunction}) monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator} to read the actual data, and exits, * without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints. * * @param inputFormat * The input format used to create the data stream * @param <OUT> * The type of the returned data stream * @return The data stream that represents the data created by the input format */ @PublicEvolving public <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat) { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); }
FileProcessingMode.PROCESS_ONCE, -1); } else { source = createInput(inputFormat, typeInfo, "Custom Source");
FileProcessingMode.PROCESS_ONCE, -1); } else { source = createInput(inputFormat, typeInfo, "Custom Source");
FileProcessingMode.PROCESS_ONCE, -1); } else { source = createInput(inputFormat, typeInfo, "Custom Source");
FileProcessingMode.PROCESS_ONCE, -1); } else { source = createInput(inputFormat, typeInfo, "Custom Source");
.createInput(csvInput, pojoType)