@Override public Iterator<List<WriteStatus>> handleInsert(String commitTime, Iterator<HoodieRecord<T>> recordItr) throws Exception { // If canIndexLogFiles, write inserts to log files else write inserts to parquet files if (index.canIndexLogFiles()) { return new MergeOnReadLazyInsertIterable<>(recordItr, config, commitTime, this); } else { return super.handleInsert(commitTime, recordItr); } }
@Override public Iterator<List<WriteStatus>> handleInsert(String commitTime, Iterator<HoodieRecord<T>> recordItr) throws Exception { // If canIndexLogFiles, write inserts to log files else write inserts to parquet files if (index.canIndexLogFiles()) { return new MergeOnReadLazyInsertIterable<>(recordItr, config, commitTime, this); } else { return super.handleInsert(commitTime, recordItr); } }
@SuppressWarnings("unchecked") @Override public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition, Iterator recordItr, Partitioner partitioner) { UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner; BucketInfo binfo = upsertPartitioner.getBucketInfo(partition); BucketType btype = binfo.bucketType; try { if (btype.equals(BucketType.INSERT)) { return handleInsert(commitTime, recordItr); } else if (btype.equals(BucketType.UPDATE)) { return handleUpdate(commitTime, binfo.fileLoc, recordItr); } else { throw new HoodieUpsertException( "Unknown bucketType " + btype + " for partition :" + partition); } } catch (Throwable t) { String msg = "Error upserting bucketType " + btype + " for partition :" + partition; logger.error(msg, t); throw new HoodieUpsertException(msg, t); } }
@SuppressWarnings("unchecked") @Override public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition, Iterator recordItr, Partitioner partitioner) { UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner; BucketInfo binfo = upsertPartitioner.getBucketInfo(partition); BucketType btype = binfo.bucketType; try { if (btype.equals(BucketType.INSERT)) { return handleInsert(commitTime, recordItr); } else if (btype.equals(BucketType.UPDATE)) { return handleUpdate(commitTime, binfo.fileLoc, recordItr); } else { throw new HoodieUpsertException( "Unknown bucketType " + btype + " for partition :" + partition); } } catch (Throwable t) { String msg = "Error upserting bucketType " + btype + " for partition :" + partition; logger.error(msg, t); throw new HoodieUpsertException(msg, t); } }
@Test public void testInsertWithPartialFailures() throws Exception { HoodieWriteConfig config = makeHoodieClientConfig(); String commitTime = HoodieTestUtils.makeNewCommitTime(); FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); // Write a few records, and get atleast one file // 10 records for partition 1, 1 record for partition 2. List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z"); records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z")); // Simulate crash after first file List<WriteStatus> statuses = HoodieClientTestUtils .collectStatuses(table.handleInsert(commitTime, records.iterator())); WriteStatus status = statuses.get(0); Path partialFile = new Path(String.format("%s/%s/%s", basePath, status.getPartitionPath(), FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))); assertTrue(fs.exists(partialFile)); // When we retry records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z"); records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z")); statuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator())); status = statuses.get(0); Path retriedFIle = new Path(String.format("%s/%s/%s", basePath, status.getPartitionPath(), FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))); assertTrue(fs.exists(retriedFIle)); assertFalse(fs.exists(partialFile)); }
@Test public void testFileSizeUpsertRecords() throws Exception { HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig( HoodieStorageConfig.newBuilder().limitFileSize(64 * 1024).parquetBlockSize(64 * 1024).parquetPageSize(64 * 1024) .build()).build(); String commitTime = HoodieTestUtils.makeNewCommitTime(); HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); List<HoodieRecord> records = new ArrayList<>(); // Approx 1150 records are written for block size of 64KB for (int i = 0; i < 2000; i++) { String recordStr = "{\"_row_key\":\"" + UUID.randomUUID().toString() + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i + "}"; TestRawTripPayload rowChange = new TestRawTripPayload(recordStr); records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange)); } // Insert new records HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator())); // Check the updated file int counts = 0; for (File file : new File(basePath + "/2016/01/31").listFiles()) { if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(commitTime)) { System.out.println(file.getName() + "-" + file.length()); counts++; } } assertEquals("If the number of records are more than 1150, then there should be a new file", 3, counts); }
} else { result = hoodieCopyOnWriteTable .handleInsert(commitTime, operation.getPartitionPath(), operation.getFileId(), scanner.iterator());
} else { result = hoodieCopyOnWriteTable .handleInsert(commitTime, operation.getPartitionPath(), operation.getFileId(), scanner.iterator());
.collectStatuses(table.handleInsert(commitTime, records.iterator())); returnedStatuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
HoodieClientTestUtils.collectStatuses(table.handleInsert(firstCommitTime, records.iterator()));
.collectStatuses(table.handleInsert(firstCommitTime, records.iterator())); Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus .mergeMetadataForWriteStatuses(writeStatuses);
TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3); records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3)); Iterator<List<WriteStatus>> insertResult = table.handleInsert("100", records.iterator()); Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100")); FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile);