/** * Declare a stream containing the absolute pathname of * newly created file names from watching {@code directory}. * <p> * This is the same as {@code directoryWatcher(t, () -> dir, null)}. * * @param te topology element whose topology the watcher will be added to * @param directory * Name of the directory to watch. * @return Stream containing absolute pathnames of newly created files in * {@code directory}. */ public static TStream<String> directoryWatcher(TopologyElement te, Supplier<String> directory) { return directoryWatcher(te, directory, null); }
return textFileReader(pathnames, null, null);
/** * Write the contents of a stream to files. * <p> * The default {@link FileWriterPolicy} is used. * <p> * This is the same as {@code textFileWriter(contents, basePathname, null)}. * <p> * Sample use: * <pre>{@code * // write a stream of LogEvent to files, using the default * // file writer policy * String basePathname = "/myLogDir/LOG"; // yield LOG_YYYYMMDD_HHMMSS * TStream<MyLogEvent> events = ... * TStream<String> stringEvents = events.map(event -> event.toString()); * FileStreams.textFileWriter(stringEvents, () -> basePathname); * }</pre> * @param contents the lines to write * @param basePathname the base pathname of the created files * @return a TSink */ public static TSink<String> textFileWriter(TStream<String> contents, Supplier<String> basePathname) { return textFileWriter(contents, basePathname, null); }
FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(contents, () -> basePath.toString(), () -> policy); TStream<String> pathnames = FileStreams.directoryWatcher(t, () -> dir.toAbsolutePath().toString()) .peek(tuple -> System.out.println(new Date() + " watcher added "+tuple)) .peek(tuple -> { if (new File(tuple).getName().startsWith(".")) throw new RuntimeException("Not filtering active/hidden files "+tuple); }); TStream<String> readContents = FileStreams.textFileReader(pathnames);
@Test public void testNoFilesCreated() throws Exception { // complete before any files are generated Topology t = newTopology("testNoFilesCreated"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); // build expected results List<List<String>> expResults = Collections.emptyList(); TStream<String> s = t.events(eventSetup -> { /* no tuples generated */ }); FileStreams.textFileWriter(s, () -> basePath.toString()); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testTextFileReader() throws Exception { Topology t = newTopology("testTextFileReader"); String[] lines = getLines(); String[] ucLines = toUpperCase(lines); String[] allLines = concat(lines, ucLines); Path tempFile1 = FileUtil.createTempFile("test1", "txt", lines); Path tempFile2 = FileUtil.createTempFile("test2", "txt", ucLines); TStream<String> contents = FileStreams.textFileReader( t.strings(tempFile1.toAbsolutePath().toString(), tempFile2.toAbsolutePath().toString())); try { completeAndValidate("", t, contents, 10, allLines); } finally { tempFile1.toFile().delete(); tempFile2.toFile().delete(); } }
TStream<String> fileNames = FileStreams.directoryWatcher(t, () -> dir.toAbsolutePath().toString());
@Test public void testFlushTupleBased() throws Exception { Topology t = newTopology("testFlushTupleBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net all in one, the first, file List<List<String>> expResults = buildExpResults(lines, tuple -> false); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newPredicateBasedConfig( tuple -> tuple.startsWith("1-") || tuple.startsWith("3-")), FileWriterCycleConfig.newCountBasedConfig(expResults.get(0).size()), // all in 1 file FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testTextFileReaderProblemPaths() throws Exception { Topology t = newTopology("testTextFileReaderProblemPaths"); String[] lines = getLines(); String[] ucLines = toUpperCase(lines); String[] allLines = concat(lines, ucLines); Path tempFile1 = FileUtil.createTempFile("test1", "txt", lines); Path tempFile2 = FileUtil.createTempFile("test2", "txt", ucLines); // ensure a problem in one file (tuple) doesn't affect others. // The problem files should result in a log entry but otherwise be ignored. TStream<String> contents = FileStreams.textFileReader( t.strings(tempFile1.toAbsolutePath().toString(), "/no-such-file", "/tmp", tempFile2.toAbsolutePath().toString())); try { completeAndValidate("", t, contents, 10, allLines); } finally { tempFile1.toFile().delete(); tempFile2.toFile().delete(); } }
@Test public void testCycleTupleBased() throws Exception { Topology t = newTopology("testCycleTupleBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // a tuple based config / predicate. in this case should end up with 3 files. // flush on the last tuple too to ensure the test completes before TMO. Predicate<String> cycleIt = tuple -> tuple.startsWith("1-") || tuple.startsWith("3-") || tuple.equals(lines[lines.length-1]); List<List<String>> expResults = buildExpResults(lines, cycleIt); assertEquals(3, expResults.size()); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newImplicitConfig(), FileWriterCycleConfig.newPredicateBasedConfig(cycleIt), FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
allLines.add(postFn.apply(tempFile2.toAbsolutePath().toString(), null)); TStream<String> contents = FileStreams.textFileReader( t.strings(tempFile1.toAbsolutePath().toString(), noSuchFilePath,
@Test public void testFlushImplicit() throws Exception { Topology t = newTopology("testFlushImplicit"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net all in one, the first, file List<List<String>> expResults = buildExpResults(lines, tuple -> false); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newImplicitConfig(), FileWriterCycleConfig.newCountBasedConfig(expResults.get(0).size()), FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testFlushCntBased() throws Exception { Topology t = newTopology("testFlushCntBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net all in one, the first, file List<List<String>> expResults = buildExpResults(lines, tuple -> false); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newCountBasedConfig(1), // every tuple FileWriterCycleConfig.newCountBasedConfig(expResults.get(0).size()), // all in 1 file FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testRetainAggSizeBased() throws Exception { // more aggsize than configured; only keep aggsize worth Topology t = newTopology("testRetainAggSizeBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net one tuple per file List<List<String>> expResults = buildExpResults(lines, tuple -> true); // agg size only enough for last two lines long aggregateFileSize = 2 * (("1-"+getStr()).getBytes(StandardCharsets.UTF_8).length + 1/*eol*/); expResults.remove(0); expResults.remove(0); assertEquals(2, expResults.size()); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newImplicitConfig(), FileWriterCycleConfig.newCountBasedConfig(1), FileWriterRetentionConfig.newAggregateFileSizeBasedConfig(aggregateFileSize) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testAllTimeBased() throws Exception { // exercise case with multiple timer based policies Topology t = newTopology("testAllTimeBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // keep all given age and TMO_SEC int ageSec = 10; long periodMsec = TimeUnit.SECONDS.toMillis(1); // net one tuple per file List<List<String>> expResults = buildExpResults(lines, tuple -> true); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newTimeBasedConfig(TimeUnit.MILLISECONDS.toMillis(250)), FileWriterCycleConfig.newConfig(1, 2000, TimeUnit.SECONDS.toMillis(10), null), FileWriterRetentionConfig.newAgeBasedConfig(ageSec, periodMsec) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testCompressedFileWriterPolicy() throws Exception { Topology t = newTopology("testCompressedFileWriterPolicy"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net 2 tuples per file int cntTuples = 2; AtomicInteger cnt = new AtomicInteger(); Predicate<String> cycleIt = tuple -> cnt.incrementAndGet() % cntTuples == 0; List<List<String>> expResults = buildExpResults(lines, cycleIt); assertEquals(lines.length / cntTuples, expResults.size()); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new CompressedFileWriterPolicy<String>( FileWriterFlushConfig.newImplicitConfig(), FileWriterCycleConfig.newCountBasedConfig(cntTuples), FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testOneFileCreated() throws Exception { // all lines into a single (the first) file Topology t = newTopology("testOneFileCreated"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net all in one, the first, file List<List<String>> expResults = buildExpResults(lines, tuple -> false); assertEquals(1, expResults.size()); TStream<String> s = t.strings(lines); // default writer policy TSink<String> sink = FileStreams.textFileWriter(s, () -> basePath.toString()); // note, with only 4 tuples, default policy won't cycle (finalize the cur file) // to make the expResults present until job stops (TMO) completeAndValidateWriter(t, TMO_SEC, basePath, expResults); assertNotNull(sink); }
@Test public void testFlushTimeBased() throws Exception { Topology t = newTopology("testFlushTimeBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net all in one, the first, file List<List<String>> expResults = buildExpResults(lines, tuple -> false); // add delay so time flush happens int throttleSec = 1; TStream<String> s = PlumbingStreams.blockingThrottle( t.strings(lines), throttleSec, TimeUnit.SECONDS); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newTimeBasedConfig(TimeUnit.MILLISECONDS.toMillis(250)), FileWriterCycleConfig.newCountBasedConfig(expResults.get(0).size()), // all in 1 file FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, (lines.length*throttleSec)+TMO_SEC, basePath, expResults); }
@Test public void testCycleCntBased() throws Exception { Topology t = newTopology("testCycleCntBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net two tuples per file int cntTuples = 2; AtomicInteger cnt = new AtomicInteger(); Predicate<String> cycleIt = tuple -> cnt.incrementAndGet() % cntTuples == 0; List<List<String>> expResults = buildExpResults(lines, cycleIt); assertEquals(lines.length / cntTuples, expResults.size()); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newImplicitConfig(), FileWriterCycleConfig.newCountBasedConfig(cntTuples), FileWriterRetentionConfig.newFileCountBasedConfig(10) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }
@Test public void testRetainCntBased() throws Exception { // more lines than configured retained numFiles; only keep the last numFiles Topology t = newTopology("testRetainCntBased"); // establish a base path Path basePath = createTempFile("test1", "txt", new String[0]); String[] lines = getLines(); // build expected results // net one tuples per file List<List<String>> expResults = buildExpResults(lines, tuple -> true); int keepCnt = 2; // only keep the last n files for (int i = 0; i < keepCnt; i++) expResults.remove(0); assertEquals(keepCnt, expResults.size()); TStream<String> s = t.strings(lines); IFileWriterPolicy<String> policy = new FileWriterPolicy<String>( FileWriterFlushConfig.newImplicitConfig(), FileWriterCycleConfig.newCountBasedConfig(1), FileWriterRetentionConfig.newFileCountBasedConfig(keepCnt) ); FileStreams.textFileWriter(s, () -> basePath.toString(), () -> policy); completeAndValidateWriter(t, TMO_SEC, basePath, expResults); }