/** * Discovers all shards of this file. * * <p>Because of eventual consistency, reads may discover no files or fewer files than the shard * template implies. In this case, the read is considered to have failed. */ public List<String> readFilesWithRetries() throws IOException, InterruptedException { return readFilesWithRetries(Sleeper.DEFAULT, BACK_OFF_FACTORY.backoff()); }
@Test public void testReadEmpty() throws Exception { File emptyFile = tmpFolder.newFile("result-000-of-001"); Files.write("", emptyFile, StandardCharsets.UTF_8); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); assertThat(shardedFile.readFilesWithRetries(), empty()); }
@Test public void testReadCustomTemplate() throws Exception { String contents1 = "To be or not to be, ", contents2 = "it is not a question."; // Customized template: resultSSS-totalNNN File tmpFile1 = tmpFolder.newFile("result0-total2"); File tmpFile2 = tmpFolder.newFile("result1-total2"); Files.write(contents1, tmpFile1, StandardCharsets.UTF_8); Files.write(contents2, tmpFile2, StandardCharsets.UTF_8); Pattern customizedTemplate = Pattern.compile("(?x) result (?<shardnum>\\d+) - total (?<numshards>\\d+)"); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern, customizedTemplate); assertThat(shardedFile.readFilesWithRetries(), containsInAnyOrder(contents1, contents2)); }
@Test public void testReadWithRetriesFailsWhenOutputDirEmpty() throws Exception { NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); }
@Test public void testReadWithRetriesFailsSinceFilesystemError() throws Exception { File tmpFile = tmpFolder.newFile(); Files.write("Test for file checksum verifier.", tmpFile, StandardCharsets.UTF_8); NumberedShardedFile shardedFile = spy(new NumberedShardedFile(filePattern)); doThrow(IOException.class).when(shardedFile).readLines(anyCollection()); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); }
@Test public void testReadWithRetriesFailsWhenTemplateIncorrect() throws Exception { File tmpFile = tmpFolder.newFile(); Files.write("Test for file checksum verifier.", tmpFile, StandardCharsets.UTF_8); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern, Pattern.compile("incorrect-template")); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); }
@Test public void testWriteLines() throws Exception { File destinationFile = new File(tempFolder.getRoot(), "lines-outputs"); BeamSqlEnv env = BeamSqlEnv.inMemory(new TextTableProvider()); env.executeDdl( String.format( "CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"lines\"}'", SQL_LINES_SCHEMA, destinationFile.getAbsolutePath())); BeamSqlRelUtils.toPCollection( pipeline, env.parseQuery("INSERT INTO test VALUES ('hello'), ('goodbye')")); pipeline.run(); assertThat( new NumberedShardedFile(destinationFile.getAbsolutePath() + "*") .readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello", "goodbye")); }
@Test public void testReadWithRetriesFailsWhenRedundantFileLoaded() throws Exception { tmpFolder.newFile("result-000-of-001"); tmpFolder.newFile("tmp-result-000-of-001"); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); thrown.expect(IOException.class); thrown.expectMessage( containsString( "Unable to read file(s) after retrying " + NumberedShardedFile.MAX_READ_RETRIES)); shardedFile.readFilesWithRetries(fastClock, backOff); } }
@Test public void testWriteCsv() throws Exception { File destinationFile = new File(tempFolder.getRoot(), "csv-outputs"); BeamSqlEnv env = BeamSqlEnv.inMemory(new TextTableProvider()); // NumberedShardedFile env.executeDdl( String.format( "CREATE EXTERNAL TABLE test %s TYPE text LOCATION '%s' TBLPROPERTIES '{\"format\":\"csv\"}'", SQL_CSV_SCHEMA, destinationFile.getAbsolutePath())); BeamSqlRelUtils.toPCollection( pipeline, env.parseQuery("INSERT INTO test VALUES ('hello', 42), ('goodbye', 13)")); pipeline.run(); assertThat( new NumberedShardedFile(destinationFile.getAbsolutePath() + "*") .readFilesWithRetries(Sleeper.DEFAULT, BackOff.STOP_BACKOFF), containsInAnyOrder("hello,42", "goodbye,13")); } }
@Test public void testReadMultipleShards() throws Exception { String contents1 = "To be or not to be, ", contents2 = "it is not a question.", contents3 = "should not be included"; File tmpFile1 = tmpFolder.newFile("result-000-of-002"); File tmpFile2 = tmpFolder.newFile("result-001-of-002"); File tmpFile3 = tmpFolder.newFile("tmp"); Files.write(contents1, tmpFile1, StandardCharsets.UTF_8); Files.write(contents2, tmpFile2, StandardCharsets.UTF_8); Files.write(contents3, tmpFile3, StandardCharsets.UTF_8); filePattern = LocalResources.fromFile(tmpFolder.getRoot(), true) .resolve("result-*", StandardResolveOptions.RESOLVE_FILE) .toString(); NumberedShardedFile shardedFile = new NumberedShardedFile(filePattern); assertThat(shardedFile.readFilesWithRetries(), containsInAnyOrder(contents1, contents2)); }