@Test public void test() { FilePathFilter defaultFilter = FilePathFilter.createDefaultFilter(); Path path = new Path(filePath); assertEquals( String.format("File: %s", filePath), shouldFilter, defaultFilter.filterPath(path)); } }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The {@link java.nio.charset.Charset} with the given name will be * used to read the files. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, * forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path") * @param charsetName * The name of the character set used to read the file * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath, String charsetName) { Preconditions.checkNotNull(filePath, "The file path must not be null."); Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty."); TextInputFormat format = new TextInputFormat(new Path(filePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO; format.setCharsetName(charsetName); return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo); }
@Override public void testProgram(StreamExecutionEnvironment env) { // set the restart strategy. env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0)); env.enableCheckpointing(10); // create and start the file creating thread. fc = new FileCreator(); fc.start(); // create the monitoring source along with the necessary readers. TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); DataStream<String> inputStream = env.readFile(format, localFsURI, FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL); TestingSinkFunction sink = new TestingSinkFunction(); inputStream.flatMap(new FlatMapFunction<String, String>() { @Override public void flatMap(String value, Collector<String> out) throws Exception { out.collect(value); } }).addSink(sink).setParallelism(1); }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The {@link java.nio.charset.Charset} with the given name will be * used to read the files. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, * forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path") * @param charsetName * The name of the character set used to read the file * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath, String charsetName) { Preconditions.checkNotNull(filePath, "The file path must not be null."); Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty."); TextInputFormat format = new TextInputFormat(new Path(filePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO; format.setCharsetName(charsetName); return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo); }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The {@link java.nio.charset.Charset} with the given name will be * used to read the files. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, * forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path") * @param charsetName * The name of the character set used to read the file * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath, String charsetName) { Preconditions.checkNotNull(filePath, "The file path must not be null."); Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty."); TextInputFormat format = new TextInputFormat(new Path(filePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO; format.setCharsetName(charsetName); return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo); }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The {@link java.nio.charset.Charset} with the given name will be * used to read the files. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, * forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path") * @param charsetName * The name of the character set used to read the file * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath, String charsetName) { Preconditions.checkNotNull(filePath, "The file path must not be null."); Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty."); TextInputFormat format = new TextInputFormat(new Path(filePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO; format.setCharsetName(charsetName); return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo); }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The {@link java.nio.charset.Charset} with the given name will be * used to read the files. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, * forwards them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more checkpoint * barriers are going to be forwarded after the source exits, thus having no checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path") * @param charsetName * The name of the character set used to read the file * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath, String charsetName) { Preconditions.checkNotNull(filePath, "The file path must not be null."); Preconditions.checkNotNull(filePath.isEmpty(), "The file path must not be empty."); TextInputFormat format = new TextInputFormat(new Path(filePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); TypeInformation<String> typeInfo = BasicTypeInfo.STRING_TYPE_INFO; format.setCharsetName(charsetName); return readFile(format, filePath, FileProcessingMode.PROCESS_ONCE, -1, typeInfo); }