public AvroFileCheckpoint(long blockStart, long recordOffset) { this.blockStart = blockStart; this.recordOffset = recordOffset; this.checkpointStr = generateCheckpointStr(blockStart, recordOffset); }
public static int offsetComparator(String offset1, String offset2) { AvroFileCheckpoint cp1 = new AvroFileCheckpoint(offset1); AvroFileCheckpoint cp2 = new AvroFileCheckpoint(offset2); return cp1.compareTo(cp2); }
/** * Reconnect to the file systems in case of failure. * Reset offset to the last checkpoint (last successfully read message). * Throw {@link org.apache.samza.SamzaException} if reaches max number of * retries. */ public void reconnect() { reconnect(getCurOffset()); }
public boolean hasNext() { while (curFileIndex < filePaths.size()) { if (curReader.hasNext()) { return true; } curReader.close(); curFileIndex++; if (curFileIndex < filePaths.size()) { curReader = HdfsReaderFactory.getHdfsReader(readerType, systemStreamPartition); curReader.open(filePaths.get(curFileIndex), "0"); } } return false; }
private void init(String offset) { if (curReader != null) { curReader.close(); curReader = null; } curFileIndex = getCurFileIndex(offset); if (curFileIndex >= filePaths.size()) { throw new SamzaException( String.format("Invalid file index %d. Number of files is %d", curFileIndex, filePaths.size())); } curSingleFileOffset = getCurSingleFileOffset(offset); curReader = HdfsReaderFactory.getHdfsReader(readerType, systemStreamPartition); curReader.open(filePaths.get(curFileIndex), curSingleFileOffset); }
public IncomingMessageEnvelope readNext() { if (!hasNext()) { LOG.warn("Attempting to read more data when there aren't any. ssp=" + systemStreamPartition); return null; } // record the next offset before we read, so when the read fails and we reconnect, // we seek to the same offset that we try below curSingleFileOffset = curReader.nextOffset(); IncomingMessageEnvelope messageEnvelope = curReader.readNext(); // Copy everything except for the offset. Turn the single-file style offset into a multi-file one return new IncomingMessageEnvelope(messageEnvelope.getSystemStreamPartition(), getCurOffset(), messageEnvelope.getKey(), messageEnvelope.getMessage(), messageEnvelope.getSize(), messageEnvelope.getEventTime(), messageEnvelope.getArrivalTime()); }
private void doPoll(MultiFileHdfsReader reader) { SystemStreamPartition systemStreamPartition = reader.getSystemStreamPartition(); while (reader.hasNext() && !isShutdown) { IncomingMessageEnvelope messageEnvelope = reader.readNext(); offerMessage(systemStreamPartition, messageEnvelope); consumerMetrics.incNumEvents(systemStreamPartition); consumerMetrics.incTotalNumEvents(); } offerMessage(systemStreamPartition, IncomingMessageEnvelope.buildEndOfStreamEnvelope(systemStreamPartition)); reader.close(); }
public void close() { LOG.info(String.format("MiltiFileHdfsReader shutdown requested for %s. Current offset = %s", systemStreamPartition, getCurOffset())); if (curReader != null) { curReader.close(); } }
public static int offsetComparator(ReaderType readerType, String offset1, String offset2) { switch (readerType) { case AVRO: return AvroFileHdfsReader.offsetComparator(offset1, offset2); default: throw new SamzaException("Unsupported reader type: " + readerType); } }
public static SingleFileHdfsReader getHdfsReader(ReaderType readerType, SystemStreamPartition systemStreamPartition) { switch (readerType) { case AVRO: return new AvroFileHdfsReader(systemStreamPartition); default: throw new SamzaException("Unsupported reader type: " + readerType); } }
private void generateAvroDataFiles() throws Exception { TestAvroFileHdfsReader.writeTestEventsToFile(AVRO_FILE_1, NUM_EVENTS); TestAvroFileHdfsReader.writeTestEventsToFile(AVRO_FILE_2, NUM_EVENTS); TestAvroFileHdfsReader.writeTestEventsToFile(AVRO_FILE_3, NUM_EVENTS); }
@Override public String toString() { return getCheckpointStr(); } }
private String getCurOffset() { return generateOffset(curFileIndex, curSingleFileOffset); }
public static ReaderType getType(String readerTypeStr) { try { return ReaderType.valueOf(readerTypeStr.toUpperCase()); } catch (IllegalArgumentException e) { throw new SamzaException("Invalid hdfs reader type string: " + readerTypeStr, e); } }
public MultiFileHdfsReader(HdfsReaderFactory.ReaderType readerType, SystemStreamPartition systemStreamPartition, List<String> partitionDescriptors, String offset, int numMaxRetries) { this.readerType = readerType; this.systemStreamPartition = systemStreamPartition; this.filePaths = partitionDescriptors; this.numMaxRetries = numMaxRetries; this.numRetries = 0; if (partitionDescriptors.size() <= 0) { throw new SamzaException( "Invalid number of files based on partition descriptors: " + partitionDescriptors.size()); } init(offset); }
@Override public IncomingMessageEnvelope readNext() { // get checkpoint for THIS record String checkpoint = nextOffset(); GenericRecord record = fileReader.next(); if (fileReader.previousSync() != curBlockStart) { curBlockStart = fileReader.previousSync(); curRecordOffset = 0; } else { curRecordOffset++; } // avro schema doesn't necessarily have key field return new IncomingMessageEnvelope(systemStreamPartition, checkpoint, null, record); }
@Test(expected = Exception.class) public void testOffsetComparator_InvalidInput() { AvroFileHdfsReader.offsetComparator("1982,13", "1930,1"); } }
@BeforeClass public static void writeAvroEvents() throws Exception { TestAvroFileHdfsReader.writeTestEventsToFile(AVRO_FILE_1, NUM_EVENTS); TestAvroFileHdfsReader.writeTestEventsToFile(AVRO_FILE_2, NUM_EVENTS); TestAvroFileHdfsReader.writeTestEventsToFile(AVRO_FILE_3, NUM_EVENTS); }
@Override public String nextOffset() { return AvroFileCheckpoint.generateCheckpointStr(curBlockStart, curRecordOffset); }
@BeforeClass public static void writeAvroEvents() throws Exception { writeTestEventsToFile(AVRO_FILE, NUM_EVENTS); }