private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() throws Exception { // Prepare the AvroParquetIO String schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8"); return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr); }
private HoodieWriteConfig makeHoodieClientConfig(String schema) throws Exception { // Prepare the AvroParquetIO String schemaStr = IOUtils.toString(getClass().getResourceAsStream(schema), "UTF-8"); return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr).build(); }
@SuppressWarnings("unchecked") public static JavaRDD<HoodieRecord> dropDuplicates(JavaSparkContext jssc, JavaRDD<HoodieRecord> incomingHoodieRecords, Map<String, String> parameters) throws Exception { HoodieWriteConfig writeConfig = HoodieWriteConfig .newBuilder() .withPath(parameters.get("path")) .withProps(parameters).build(); return dropDuplicates(jssc, incomingHoodieRecords, writeConfig); } }
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath, String tblName, Map<String, String> parameters) throws Exception { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().combineInput(true, true) .withPath(basePath).withAutoCommit(false) .withSchema(schemaStr).forTable(tblName).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withCompactionConfig(HoodieCompactionConfig.newBuilder() .withPayloadClass(parameters.get( DataSourceWriteOptions .PAYLOAD_CLASS_OPT_KEY())) .build()) // override above with Hoodie configs specified as options. .withProps(parameters).build(); return new HoodieWriteClient<>(jssc, writeConfig); }
@Test public void testTagLocationWithEmptyRDD() throws Exception { // We have some records to be tagged (two different partitions) JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD(); // Also create the metadata and config HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build(); HoodieTable table = HoodieTable.getHoodieTable(metadata, config, jsc); // Let's tag HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config); try { bloomIndex.tagLocation(recordRDD, jsc, table); } catch (IllegalArgumentException e) { fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices " + "required"); } }
public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc, String schemaStr, String basePath, String tblName, Map<String, String> parameters) throws Exception { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().combineInput(true, true) .withPath(basePath).withAutoCommit(false) .withSchema(schemaStr).forTable(tblName).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withCompactionConfig(HoodieCompactionConfig.newBuilder() .withPayloadClass(parameters.get( DataSourceWriteOptions .PAYLOAD_CLASS_OPT_KEY())) .build()) // override above with Hoodie configs specified as options. .withProps(parameters).build(); return new HoodieWriteClient<>(jssc, writeConfig); }
private HoodieWriteConfig.Builder getConfigBuilder() { return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) .withParallelism(1, 1).withCompactionConfig( HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).withInlineCompaction(false) .build()).withAutoCommit(false) .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build()) .forTable("test-trip-table").withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.HBASE) .withHBaseIndexConfig( new HoodieHBaseIndexConfig.Builder() .hbaseZkPort(Integer.valueOf(hbaseConfig.get("hbase.zookeeper.property.clientPort"))) .hbaseZkQuorum(hbaseConfig.get("hbase.zookeeper.quorum")).hbaseTableName(tableName) .hbaseIndexGetBatchSize(100).build()) .build()); } }
/** * Get Config builder with default configs set * * @return Config Builder */ HoodieWriteConfig.Builder getConfigBuilder() { return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) .withParallelism(2, 2) .withBulkInsertParallelism(2).withFinalizeWriteParallelism(2) .withConsistencyCheckEnabled(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build()) .forTable("test-trip-table") .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()); }
@Test public void testUnBounded() { Map<Long, List<Long>> sizesMap = Maps.newHashMap(); sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB)); sizesMap.put(110 * MB, Lists.newArrayList()); sizesMap.put(100 * MB, Lists.newArrayList(MB)); sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB)); UnBoundedCompactionStrategy strategy = new UnBoundedCompactionStrategy(); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig( HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).build()).build(); List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap); List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>()); assertEquals("UnBounded should not re-order or filter", operations, returned); }
/** * @param basePath path to Hoodie dataset */ public HoodieReadClient(JavaSparkContext jsc, String basePath) { this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath) // by default we use HoodieBloomIndex .withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build()); }
private HoodieWriteConfig getHoodieClientConfig() throws Exception { return HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath) .withAutoCommit(false) .withSchema(schemaProvider.getTargetSchema().toString()) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withPayloadClass(cfg.payloadClassName).build()) .forTable(cfg.targetTableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withProps(props).build(); }
@Test public void testArchiveEmptyDataset() throws IOException { HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath) .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .forTable("test-trip-table").build(); HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, new HoodieTableMetaClient(dfs.getConf(), cfg.getBasePath(), true)); boolean result = archiveLog.archiveIfRequired(jsc); assertTrue(result); }
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(); return new HoodieWriteClient(jsc, config, false); }
private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(); return new HoodieWriteClient(jsc, config); } }
private HoodieWriteConfig.Builder getConfigBuilder() { return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) .withParallelism(2, 2).withCompactionConfig( HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).withInlineCompaction(false) .build()).withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build()) .withMemoryConfig(HoodieMemoryConfig.newBuilder().withMaxDFSStreamBufferSize(1 * 1024 * 1024).build()) .forTable("test-trip-table") .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()); }
private HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit) { return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .withAutoCommit(autoCommit).withAssumeDatePartitioning(true).withCompactionConfig( HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024).withInlineCompaction(false) .withMaxNumDeltaCommitsBeforeCompaction(1).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024 * 1024).build()) .forTable("test-trip-table") .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()); }
private HoodieWriteConfig getHoodieClientConfig() throws Exception { return HoodieWriteConfig.newBuilder().combineInput(true, true).withPath(cfg.targetBasePath) .withAutoCommit(false) .withSchema(schemaProvider.getTargetSchema().toString()) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withPayloadClass(cfg.payloadClassName).build()) .forTable(cfg.targetTableName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withProps(props).build(); }
private HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit, HoodieIndex.IndexType indexType) { return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .withAutoCommit(autoCommit).withAssumeDatePartitioning(true).withCompactionConfig( HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024).withInlineCompaction(false) .withMaxNumDeltaCommitsBeforeCompaction(1).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024 * 1024).build()) .forTable("test-trip-table") .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(indexType).build()); }
HoodieWriteConfig.Builder getConfigBuilder() { return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) .withParallelism(2, 2) .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build()) .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build()) .forTable("test-trip-table") .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .withBulkInsertParallelism(2); } }
/** * @param basePath path to Hoodie dataset */ public HoodieReadClient(JavaSparkContext jsc, String basePath) { this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath) // by default we use HoodieBloomIndex .withIndexConfig( HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build()); }