@Override protected DataWriter<FileAwareInputStream> buildWriter() throws IOException { return new TarArchiveInputStreamDataWriter(this.destination.getProperties(), this.branches, this.branch); }
@Override public DataWriter<GenericRecord> build() throws IOException { Preconditions.checkNotNull(this.destination); Preconditions.checkArgument(!Strings.isNullOrEmpty(this.writerId)); Preconditions.checkNotNull(this.schema); Preconditions.checkArgument(this.format == WriterOutputFormat.AVRO); switch (this.destination.getType()) { case HDFS: return new AvroHdfsDataWriter(this, this.destination.getProperties()); default: throw new RuntimeException("Unknown destination type: " + this.destination.getType()); } } }
@Test public void testMethods() { State state = new State(); state.setProp("foo", "bar"); Destination destination = Destination.of(Destination.DestinationType.HDFS, state); Assert.assertEquals(destination.getType(), Destination.DestinationType.HDFS); Assert.assertEquals(destination.getProperties().getPropertyNames().size(), 1); Assert.assertEquals(destination.getProperties().getProp("foo"), "bar"); } }
private JdbcWriterCommands createJdbcWriterCommands(Connection conn) { String destKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY, this.branches, this.branchId); String destType = Preconditions.checkNotNull(this.state.getProp(destKey), destKey + " is required for underlying JDBC product name"); Destination dest = Destination.of(DestinationType.valueOf(destType.toUpperCase()), this.state); return this.jdbcWriterCommandsFactory.newInstance(dest, conn); }
/** * Create a new {@link Destination} instance. * * @param type destination type * @param properties destination properties * @return newly created {@link Destination} instance */ public static Destination of(DestinationType type, State properties) { return new Destination(type, properties); } }
private ParquetDataWriterBuilder getParquetDataWriterBuilder() { ParquetDataWriterBuilder writerBuilder = new ParquetDataWriterBuilder(); writerBuilder.destination = Destination.of(Destination.DestinationType.HDFS, properties); writerBuilder.writerId = TestConstants.TEST_WRITER_ID; writerBuilder.schema = this.schema; writerBuilder.format = WriterOutputFormat.PARQUET; return writerBuilder; }
/** * Create a new {@link Destination} instance. * * @param type destination type * @param properties destination properties * @return newly created {@link Destination} instance */ public static Destination of(DestinationType type, State properties) { return new Destination(type, properties); } }
protected DataWriter<FileAwareInputStream> buildWriter() throws IOException { return new FileAwareInputStreamDataWriter(this.destination.getProperties(), this.branches, this.branch, this.writerAttemptId); }
@Override public DataWriter<Group> build() throws IOException { Preconditions.checkNotNull(this.destination); Preconditions.checkArgument(!Strings.isNullOrEmpty(this.writerId)); Preconditions.checkNotNull(this.schema); Preconditions.checkArgument(this.format == WriterOutputFormat.PARQUET); switch (this.destination.getType()) { case HDFS: return new ParquetHdfsDataWriter(this, this.destination.getProperties()); default: throw new RuntimeException("Unknown destination type: " + this.destination.getType()); } }
/** * @param state * @return Provides JdbcWriterCommands based on ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY */ public JdbcWriterCommands newInstance(State state, Connection conn) { String destKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY, state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1), state.getPropAsInt(ConfigurationKeys.FORK_BRANCH_ID_KEY, 0)); String destType = state.getProp(destKey); Preconditions.checkNotNull(destType, destKey + " is required for underlying JDBC product name"); return newInstance(Destination.of(DestinationType.valueOf(destType.toUpperCase()), state), conn); } }
@Override public DataWriter<QueryBasedHiveConversionEntity> build() throws IOException { try { return new HiveQueryExecutionWriter(HiveJdbcConnector.newConnectorWithProps(this.destination.getProperties().getProperties()), this.destination.getProperties()); } catch (SQLException e) { throw new RuntimeException(e); } } }
/** * @param destination * @return Provides JdbcWriterCommands bases on destination. */ public JdbcWriterCommands newInstance(Destination destination, Connection conn) { switch (destination.getType()) { case MYSQL: return new MySqlWriterCommands(destination.getProperties(), conn); case TERADATA: return new TeradataWriterCommands(destination.getProperties(), conn); case POSTGRES: return new PostgresWriterCommands(destination.getProperties(), conn); default: throw new IllegalArgumentException(destination.getType() + " is not supported"); } }
/** * Build a {@link org.apache.gobblin.writer.DataWriter} for writing fetched data records. */ private DataWriter<Object> buildWriter() throws IOException { DataWriterBuilder<Object, Object> builder = this.taskContext.getDataWriterBuilder(this.branches, this.index) .writeTo(Destination.of(this.taskContext.getDestinationType(this.branches, this.index), this.taskState)) .writeInFormat(this.taskContext.getWriterOutputFormat(this.branches, this.index)).withWriterId(this.taskId) .withSchema(this.convertedSchema.orNull()).withBranches(this.branches).forBranch(this.index); if (this.taskAttemptId.isPresent()) { builder.withAttemptId(this.taskAttemptId.get()); } DataWriter<Object> writer = new PartitionedDataWriter<>(builder, this.taskContext.getTaskState()); logger.info("Wrapping writer " + writer); return new DataWriterWrapperBuilder<>(writer, this.taskState).build(); }
@Override public final DataWriter<FileAwareInputStream> build() throws IOException { setJobSpecificOutputPaths(this.destination.getProperties()); // Each writer/mapper gets its own task-staging directory this.destination.getProperties().setProp(ConfigurationKeys.WRITER_FILE_PATH, this.writerId); return buildWriter(); }
/** * @param destination * @return Provides JdbcWriterCommands bases on destination. */ public JdbcWriterCommands newInstance(Destination destination, Connection conn) { switch (destination.getType()) { case MYSQL: return new MySqlWriterCommands(destination.getProperties(), conn); case TERADATA: return new TeradataWriterCommands(destination.getProperties(), conn); default: throw new IllegalArgumentException(destination.getType() + " is not supported"); } }
.withWriterId("0").writeTo(Destination.of(Destination.DestinationType.HDFS, sourceState)) .writeInFormat(WriterOutputFormat.ORC).build();
/** * Build a {@link org.apache.gobblin.writer.DataWriter}. * * @return the built {@link org.apache.gobblin.writer.DataWriter} * @throws java.io.IOException if there is anything wrong building the writer */ @Override public DataWriter<Object> build() throws IOException { return new MetadataWriterWrapper<byte[]>(new SimpleDataWriter(this, this.destination.getProperties()), byte[].class, this.branches, this.branch, this.destination.getProperties()); }
@Override public DataWriter<GenericRecord> build() throws IOException { Preconditions.checkNotNull(this.destination); Preconditions.checkArgument(!Strings.isNullOrEmpty(this.writerId)); Preconditions.checkNotNull(this.schema); Preconditions.checkArgument(this.format == WriterOutputFormat.AVRO); switch (this.destination.getType()) { case HDFS: return new AvroHdfsDataWriter(this, this.destination.getProperties()); default: throw new RuntimeException("Unknown destination type: " + this.destination.getType()); } } }
@BeforeClass public void setUp() throws Exception { // Making the staging and/or output dirs if necessary File stagingDir = new File(TestConstants.TEST_STAGING_DIR); File outputDir = new File(TestConstants.TEST_OUTPUT_DIR); if (!stagingDir.exists()) { stagingDir.mkdirs(); } if (!outputDir.exists()) { outputDir.mkdirs(); } this.schema = new Schema.Parser().parse(TestConstants.AVRO_SCHEMA); this.filePath = TestConstants.TEST_EXTRACT_NAMESPACE.replaceAll("\\.", "/") + "/" + TestConstants.TEST_EXTRACT_TABLE + "/" + TestConstants.TEST_EXTRACT_ID + "_" + TestConstants.TEST_EXTRACT_PULL_TYPE; properties = new State(); properties.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE); properties.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, TestConstants.TEST_FS_URI); properties.setProp(ConfigurationKeys.WRITER_STAGING_DIR, TestConstants.TEST_STAGING_DIR); properties.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, TestConstants.TEST_OUTPUT_DIR); properties.setProp(ConfigurationKeys.WRITER_FILE_PATH, this.filePath); properties.setProp(ConfigurationKeys.WRITER_FILE_NAME, TestConstants.TEST_FILE_NAME); // Build a writer to write test records this.writer = new AvroDataWriterBuilder().writeTo(Destination.of(Destination.DestinationType.HDFS, properties)) .writeInFormat(WriterOutputFormat.AVRO).withWriterId(TestConstants.TEST_WRITER_ID).withSchema(this.schema) .withBranches(1).forBranch(0).build(); }
/** * For backward compatibility on how Fork creates writer, invoke fromState when it's called writeTo method. * @param destination * @return this */ @Override public AsyncHttpWriterBuilder<D, RQ, RP> writeTo(Destination destination) { super.writeTo(destination); return fromState(destination.getProperties()); }