/** * Creates an object in GCS. * * <p>Returns a WritableByteChannel that can be used to write data to the object. * * @param path the GCS file to write to * @param type the type of object, eg "text/plain". * @return a Callable object that encloses the operation. */ public WritableByteChannel create(GcsPath path, String type) throws IOException { return create(path, type, uploadBufferSizeBytes); }
/** * Creates the {@link OutputStream} for the output file either on GCS or on * local FS (for testing). * * @param outputPath The full path of the output file. * @param c The {@link org.apache.beam.sdk.transforms.DoFn.ProcessContext} * @return An {@link OutputStream} for the opened output file. * @throws IOException if the output file cannot be opened. */ private OutputStream createOutputStream(Path outputPath, ProcessContext c) throws IOException { if (GcsPath.GCS_URI.matcher(outputPath.toString()).matches()) { // Writing the Avro file to GCS. org.apache.beam.sdk.util.GcsUtil gcsUtil = c.getPipelineOptions().as(GcsOptions.class).getGcsUtil(); String gcsType = "application/octet-stream"; WritableByteChannel gcsChannel = gcsUtil.create((GcsPath) outputPath, gcsType); return Channels.newOutputStream(gcsChannel); } else { // Avro file is created on local filesystem (for testing). return Files.newOutputStream(outputPath); } } })
private GcsUtil buildMockGcsUtil() throws IOException { GcsUtil mockGcsUtil = mock(GcsUtil.class); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .then( invocation -> FileChannel.open( Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE)); when(mockGcsUtil.expand(any(GcsPath.class))) .then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0])); return mockGcsUtil; }
String eofFileName = options.getOutput() + "-EOF"; final OutputStream os = Channels.newOutputStream( (new GcsUtil.GcsUtilFactory()).create(options).create( GcsPath.fromUri(eofFileName), FILE_MIME_TYPE));
@Override protected WritableByteChannel create(GcsResourceId resourceId, CreateOptions createOptions) throws IOException { if (createOptions instanceof GcsCreateOptions) { return options .getGcsUtil() .create( resourceId.getGcsPath(), createOptions.mimeType(), ((GcsCreateOptions) createOptions).gcsUploadBufferSizeBytes()); } else { return options.getGcsUtil().create(resourceId.getGcsPath(), createOptions.mimeType()); } }
String eofFileName = options.getOutput() + "-EOF"; final OutputStream os = Channels.newOutputStream( (new GcsUtil.GcsUtilFactory()).create(options).create( GcsPath.fromUri(eofFileName), FILE_MIME_TYPE));
/** * Make sure we can get to the output for single-file test results. * * Also write a sentinel value to the file. This protects against the possibility of prior * test output causing a newly failing test to appear to succeed. * * @param outputPath * @throws IOException */ public void touchOutput(String outputPath) throws IOException { try (Writer writer = Channels.newWriter(gcsUtil.create(GcsPath.fromUri(outputPath), "text/plain"), "UTF-8")) { writer.write("output will go here"); } }
@Test(expected = RuntimeException.class) public void testPackageUploadFailsWhenIOExceptionThrown() throws Exception { File tmpFile = makeFileWithContents("file.txt", "This is a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .thenThrow(new IOException("Fake Exception: Upload error")); try (PackageUtil directPackageUtil = PackageUtil.withExecutorService(MoreExecutors.newDirectExecutorService())) { directPackageUtil.stageClasspathElements( ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper, createOptions); } finally { verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil, times(5)).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); } }
@Test public void testPackageUploadEventuallySucceeds() throws Exception { Pipe pipe = Pipe.open(); File tmpFile = makeFileWithContents("file.txt", "This is a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .thenThrow(new IOException("Fake Exception: 410 Gone")) // First attempt fails .thenReturn(pipe.sink()); // second attempt succeeds try (PackageUtil directPackageUtil = PackageUtil.withExecutorService(MoreExecutors.newDirectExecutorService())) { directPackageUtil.stageClasspathElements( ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, fastNanoClockAndSleeper, createOptions); } finally { verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil, times(2)).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); } }
@Test public void testPackageUploadIsNotSkippedWhenSizesAreDifferent() throws Exception { Pipe pipe = Pipe.open(); File tmpDirectory = tmpFolder.newFolder("folder"); tmpFolder.newFolder("folder", "empty_directory"); tmpFolder.newFolder("folder", "directory"); makeFileWithContents("folder/file.txt", "This is a test!"); makeFileWithContents("folder/directory/file.txt", "This is also a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create( createStorageObject(STAGING_PATH, Long.MAX_VALUE)))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, createOptions); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); }
@Test public void testPackageUploadWithExplicitPackageName() throws Exception { Pipe pipe = Pipe.open(); File tmpFile = makeFileWithContents("file.txt", "This is a test!"); final String overriddenName = "alias.txt"; when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(overriddenName + "=" + tmpFile.getAbsolutePath()), STAGING_PATH, createOptions); DataflowPackage target = Iterables.getOnlyElement(targets); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); assertThat(target.getName(), equalTo(overriddenName)); assertThat( target.getLocation(), RegexMatcher.matches(STAGING_PATH + "file-" + HASH_PATTERN + ".txt")); }
ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .thenThrow( new IOException( } finally { verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil);
@Test public void testPackageUploadWithDirectorySucceeds() throws Exception { Pipe pipe = Pipe.open(); File tmpDirectory = tmpFolder.newFolder("folder"); tmpFolder.newFolder("folder", "empty_directory"); tmpFolder.newFolder("folder", "directory"); makeFileWithContents("folder/file.txt", "This is a test!"); makeFileWithContents("folder/directory/file.txt", "This is also a test!"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, createOptions); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); List<String> zipEntryNames = new ArrayList<>(); try (ZipInputStream inputStream = new ZipInputStream(Channels.newInputStream(pipe.source()))) { for (ZipEntry entry = inputStream.getNextEntry(); entry != null; entry = inputStream.getNextEntry()) { zipEntryNames.add(entry.getName()); } } assertThat( zipEntryNames, containsInAnyOrder("directory/file.txt", "empty_directory/", "file.txt")); }
@Test public void testPackageUploadWithFileSucceeds() throws Exception { Pipe pipe = Pipe.open(); String contents = "This is a test!"; File tmpFile = makeFileWithContents("file.txt", contents); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpFile.getAbsolutePath()), STAGING_PATH, createOptions); DataflowPackage target = Iterables.getOnlyElement(targets); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); assertThat(target.getName(), RegexMatcher.matches("file-" + HASH_PATTERN + ".txt")); assertThat(target.getLocation(), equalTo(STAGING_PATH + target.getName())); assertThat( new LineReader(Channels.newReader(pipe.source(), StandardCharsets.UTF_8.name())).readLine(), equalTo(contents)); }
@Test public void testPackageUploadWithEmptyDirectorySucceeds() throws Exception { Pipe pipe = Pipe.open(); File tmpDirectory = tmpFolder.newFolder("folder"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())).thenReturn(pipe.sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(tmpDirectory.getAbsolutePath()), STAGING_PATH, createOptions); DataflowPackage target = Iterables.getOnlyElement(targets); verify(mockGcsUtil).getObjects(anyListOf(GcsPath.class)); verify(mockGcsUtil).create(any(GcsPath.class), anyString()); verifyNoMoreInteractions(mockGcsUtil); assertThat(target.getName(), RegexMatcher.matches("folder-" + HASH_PATTERN + ".jar")); assertThat(target.getLocation(), equalTo(STAGING_PATH + target.getName())); try (ZipInputStream zipInputStream = new ZipInputStream(Channels.newInputStream(pipe.source()))) { assertNull(zipInputStream.getNextEntry()); } }
@Test public void testStagingPreservesClasspath() throws Exception { File smallFile = makeFileWithContents("small.txt", "small"); File largeFile = makeFileWithContents("large.txt", "large contents"); when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))) .thenReturn( ImmutableList.of( StorageObjectOrIOException.create(new FileNotFoundException("some/path")))); when(mockGcsUtil.create(any(GcsPath.class), anyString())) .thenAnswer(invocation -> Pipe.open().sink()); List<DataflowPackage> targets = defaultPackageUtil.stageClasspathElements( ImmutableList.of(smallFile.getAbsolutePath(), largeFile.getAbsolutePath()), STAGING_PATH, createOptions); // Verify that the packages are returned small, then large, matching input order even though // the large file would be uploaded first. assertThat(targets.get(0).getName(), startsWith("small")); assertThat(targets.get(1).getName(), startsWith("large")); }
Channels.newOutputStream( new GcsUtil.GcsUtilFactory().create(options) .create(GcsPath.fromUri(shardName), BAMIO.BAM_INDEX_FILE_MIME_TYPE)); ts = new TruncatedOutputStream(
Channels.newOutputStream( new GcsUtil.GcsUtilFactory().create(options) .create(GcsPath.fromUri(shardName), BAMIO.BAM_INDEX_FILE_MIME_TYPE)); ts = new TruncatedOutputStream(
options.setGcpCredential(new TestCredential()); when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt())) .then( invocation ->