public HoodieInsertValueGenResult(T record, Schema schema) { this.record = record; try { this.insertValue = record.getData().getInsertValue(schema); } catch (Exception e) { this.exception = Optional.of(e); } } }
private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) { Optional recordMetadata = hoodieRecord.getData().getMetadata(); try { Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);
public static <T extends HoodieRecordPayload> JavaRDD<HoodieRecord<T>> combineRecords( final JavaRDD<HoodieRecord<T>> records, final Function<HoodieRecord<T>, Object> recordKeyFunc, final int parallelism) { return records .mapToPair(record -> new Tuple2<>(recordKeyFunc.call(record), record)) .reduceByKey((rec1, rec2) -> { @SuppressWarnings("unchecked") T reducedData = (T) rec1.getData().preCombine(rec2.getData()); return new HoodieRecord<T>(rec1.getKey(), reducedData); }, parallelism) .map(recordTuple -> recordTuple._2()); } }
/** * Perform the actual writing of the given record into the backing file. */ public void write(HoodieRecord record, Optional<IndexedRecord> avroRecord, Optional<Exception> exception) { Optional recordMetadata = record.getData().getMetadata(); if (exception.isPresent() && exception.get() instanceof Throwable) { // Not throwing exception from here, since we don't want to fail the entire job for a single record writeStatus.markFailure(record, exception.get(), recordMetadata); logger.error("Error writing record " + record, exception.get()); } else { write(record, avroRecord); } }
try { Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData() .combineAndGetUpdateValue(oldRecord, schema); if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) {
/** * Deduplicate Hoodie records, using the given deduplication funciton. */ JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records, int parallelism) { boolean isIndexingGlobal = index.isGlobal(); return records .mapToPair(record -> { HoodieKey hoodieKey = record.getKey(); // If index used is global, then records are expected to differ in their partitionPath Object key = isIndexingGlobal ? hoodieKey.getRecordKey() : hoodieKey; return new Tuple2<>(key, record); }) .reduceByKey((rec1, rec2) -> { @SuppressWarnings("unchecked") T reducedData = (T) rec1.getData() .preCombine(rec2.getData()); // we cannot allow the user to change the key or partitionPath, since that will affect // everything // so pick it from one of the records. return new HoodieRecord<T>(rec1.getKey(), reducedData); }, parallelism).map(Tuple2::_2); }
private boolean writeRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) { Optional recordMetadata = hoodieRecord.getData().getMetadata(); try { if (indexedRecord.isPresent()) { storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord); recordsWritten++; } else { recordsDeleted++; } writeStatus.markSuccess(hoodieRecord, recordMetadata); // deflate record payload after recording success. This will help users access payload as a // part of marking // record successful. hoodieRecord.deflate(); return true; } catch (Exception e) { logger.error("Error writing record " + hoodieRecord, e); writeStatus.markFailure(hoodieRecord, e, recordMetadata); } return false; }
try { Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData() .combineAndGetUpdateValue(oldRecord, schema); if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) {
/** * Transformer function to help transform a HoodieRecord. This transformer is used by BufferedIterator to offload some * expensive operations of transformation to the reader thread. */ static <T extends HoodieRecordPayload> Function<HoodieRecord<T>, Tuple2<HoodieRecord<T>, Optional<IndexedRecord>>> getTransformFunction(Schema schema) { return hoodieRecord -> { try { return new Tuple2<HoodieRecord<T>, Optional<IndexedRecord>>(hoodieRecord, hoodieRecord.getData().getInsertValue(schema)); } catch (IOException e) { throw new HoodieException(e); } }; }
private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) { Optional recordMetadata = hoodieRecord.getData().getMetadata(); try { Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);
/** * Deduplicate Hoodie records, using the given deduplication funciton. */ JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records, int parallelism) { boolean isIndexingGlobal = index.isGlobal(); return records .mapToPair(record -> { HoodieKey hoodieKey = record.getKey(); // If index used is global, then records are expected to differ in their partitionPath Object key = isIndexingGlobal ? hoodieKey.getRecordKey() : hoodieKey; return new Tuple2<>(key, record); }) .reduceByKey((rec1, rec2) -> { @SuppressWarnings("unchecked") T reducedData = (T) rec1.getData() .preCombine(rec2.getData()); // we cannot allow the user to change the key or partitionPath, since that will affect // everything // so pick it from one of the records. return new HoodieRecord<T>(rec1.getKey(), reducedData); }, parallelism).map(recordTuple -> recordTuple._2()); }
private boolean writeRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) { Optional recordMetadata = hoodieRecord.getData().getMetadata(); try { if (indexedRecord.isPresent()) { storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord); recordsWritten++; } else { recordsDeleted++; } writeStatus.markSuccess(hoodieRecord, recordMetadata); // deflate record payload after recording success. This will help users access payload as a // part of marking // record successful. hoodieRecord.deflate(); return true; } catch (Exception e) { logger.error("Error writing record " + hoodieRecord, e); writeStatus.markFailure(hoodieRecord, e, recordMetadata); } return false; }
/** * Writes all records passed */ public void write() { try { while (recordIterator.hasNext()) { HoodieRecord<T> record = recordIterator.next(); write(record, record.getData().getInsertValue(schema)); } } catch (IOException io) { throw new HoodieInsertException( "Failed to insert records for path " + getStorageWriterPath(), io); } }
@Override protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws IOException { String key = hoodieRecord.getRecordKey(); if (records.containsKey(key)) { // Merge and store the merged record. The HoodieRecordPayload implementation is free to decide what should be // done when a delete (empty payload) is encountered before or after an insert/update. HoodieRecordPayload combinedValue = records.get(key).getData().preCombine(hoodieRecord.getData()); records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue)); } else { // Put the record as is records.put(key, hoodieRecord); } }
@Override public void write(HoodieRecord record, Optional<IndexedRecord> insertValue) { Optional recordMetadata = record.getData().getMetadata(); try { init(record); flushToDiskIfRequired(record); writeToBuffer(record); } catch (Throwable t) { // Not throwing exception from here, since we don't want to fail the entire job // for a single record writeStatus.markFailure(record, t, recordMetadata); logger.error("Error writing record " + record, t); } }
/** * Writes all records passed */ public void write() { try { while (recordIterator.hasNext()) { HoodieRecord<T> record = recordIterator.next(); write(record, record.getData().getInsertValue(schema)); } } catch (IOException io) { throw new HoodieInsertException( "Failed to insert records for path " + getStorageWriterPath(), io); } }
@Override public void write(HoodieRecord record, Optional<IndexedRecord> insertValue) { Optional recordMetadata = record.getData().getMetadata(); try { init(record); flushToDiskIfRequired(record); writeToBuffer(record); } catch (Throwable t) { // Not throwing exception from here, since we don't want to fail the entire job // for a single record writeStatus.markFailure(record, t, recordMetadata); logger.error("Error writing record " + record, t); } }
HoodieMemoryConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH); for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : scanner) { Optional<IndexedRecord> record = hoodieRecord.getData().getInsertValue(readerSchema); if (allRecords.size() >= limit) { break;
/** * Perform the actual writing of the given record into the backing file. */ public void write(HoodieRecord record, Optional<IndexedRecord> avroRecord) { Optional recordMetadata = record.getData().getMetadata(); try { if (avroRecord.isPresent()) { storageWriter.writeAvroWithMetadata(avroRecord.get(), record); // update the new location of record, so we know where to find it next record.setNewLocation(new HoodieRecordLocation(commitTime, writeStatus.getFileId())); recordsWritten++; insertRecordsWritten++; } else { recordsDeleted++; } writeStatus.markSuccess(record, recordMetadata); // deflate record payload after recording success. This will help users access payload as a // part of marking // record successful. record.deflate(); } catch (Throwable t) { // Not throwing exception from here, since we don't want to fail the entire job // for a single record writeStatus.markFailure(record, t, recordMetadata); logger.error("Error writing record " + record, t); } }
Optional<IndexedRecord> recordOptional = hoodieRecord.getData().getInsertValue(getReaderSchema()); ArrayWritable aWritable; String key = hoodieRecord.getRecordKey();