void prepare(Map<String, Object> conf, IMetricsContext metrics, int partitionIndex, int numPartitions) { this.options.prepare(conf, partitionIndex, numPartitions); initLastTxn(conf, partitionIndex); }
@Override public void updateState(HdfsState state, List<TridentTuple> tuples, TridentCollector collector) { state.updateState(tuples, collector); } }
/** * for unit tests */ void close() throws IOException { this.options.closeOutputFile(); }
public static StormTopology buildTopology(String hdfsUrl) { FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence", "key"), 1000, new Values("the cow jumped over the moon", 1l), new Values("the man went to the store and bought some candy", 2l), new Values("four score and seven years ago", 3l), new Values("how many apples can you eat", 4l), new Values("to be or not to be the person", 5l)); spout.setCycle(true); TridentTopology topology = new TridentTopology(); Stream stream = topology.newStream("spout1", spout); Fields hdfsFields = new Fields("sentence", "key"); FileNameFormat fileNameFormat = new DefaultFileNameFormat() .withPath("/tmp/trident") .withPrefix("trident") .withExtension(".txt"); RecordFormat recordFormat = new DelimitedRecordFormat() .withFields(hdfsFields); FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB); HdfsState.Options options = new HdfsState.HdfsFileOptions() .withFileNameFormat(fileNameFormat) .withRecordFormat(recordFormat) .withRotationPolicy(rotationPolicy) .withFsUrl(hdfsUrl) .withConfigKey("hdfs.config"); StateFactory factory = new HdfsStateFactory().withOptions(options); TridentState state = stream .partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields()); return topology.build(); }
private HdfsState createHdfsState() { Fields hdfsFields = new Fields("f1"); RecordFormat recordFormat = new DelimitedRecordFormat().withFields(hdfsFields); FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB); HdfsState.Options options = new HdfsState.HdfsFileOptions() .withFileNameFormat(fileNameFormat) .withRecordFormat(recordFormat) .withRotationPolicy(rotationPolicy) .withFsUrl("file://" + TEST_OUT_DIR); Map<String, Object> conf = new HashMap<>(); conf.put(Config.TOPOLOGY_NAME, TEST_TOPOLOGY_NAME); HdfsState state = new HdfsState(options); state.prepare(conf, null, 0, 1); return state; }
public static StormTopology buildTopology(String hdfsUrl) { FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence", "key"), 1000, new Values("the cow jumped over the moon", 1l), new Values("the man went to the store and bought some candy", 2l), new Values("four score and seven years ago", 3l), new Values("how many apples can you eat", 4l), new Values("to be or not to be the person", 5l)); spout.setCycle(true); TridentTopology topology = new TridentTopology(); Stream stream = topology.newStream("spout1", spout); Fields hdfsFields = new Fields("sentence", "key"); FileNameFormat fileNameFormat = new DefaultFileNameFormat() .withPath("/tmp/trident") .withPrefix("trident") .withExtension(".seq"); FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(5.0f, FileSizeRotationPolicy.Units.MB); HdfsState.Options seqOpts = new HdfsState.SequenceFileOptions() .withFileNameFormat(fileNameFormat) .withSequenceFormat(new DefaultSequenceFormat("key", "sentence")) .withRotationPolicy(rotationPolicy) .withFsUrl(hdfsUrl) .withConfigKey("hdfs.config") .addRotationAction(new MoveFileAction().toDestination("/tmp/dest2/")); StateFactory factory = new HdfsStateFactory().withOptions(seqOpts); TridentState state = stream .partitionPersist(factory, hdfsFields, new HdfsUpdater(), new Fields()); return topology.build(); }
@Test public void testRecoverMultipleBatches() throws Exception { HdfsState state = createHdfsState(); state.beginCommit(1L); state.updateState(createMockTridentTuples(batch1Count), null); state.commit(1L); state.beginCommit(2L); state.updateState(createMockTridentTuples(batch2Count), null); state.commit(2L); state.beginCommit(3L); state.updateState(createMockTridentTuples(batch3Count), null); state.commit(3L); state.beginCommit(3L); state.updateState(createMockTridentTuples(batch3ReplayCount), null); state.commit(3L); state.close(); List<String> lines = getLinesFromCurrentDataFile(); int preReplayCount = batch1Count + batch2Count + batch3Count; int expectedTupleCount = batch1Count + batch2Count + batch3ReplayCount;
@Override public State makeState(Map<String, Object> conf, IMetricsContext metrics, int partitionIndex, int numPartitions) { LOG.info("makeState(partitonIndex={}, numpartitions={}", partitionIndex, numPartitions); HdfsState state = new HdfsState(this.options); state.prepare(conf, metrics, partitionIndex, numPartitions); return state; } }
@Override public void doCommit(Long txId) throws IOException { if (this.rotationPolicy.mark(this.writer.getLength())) { rotateOutputFile(); this.rotationPolicy.reset(); } else { this.writer.hsync(); } }
private void updateIndex(long txId) { LOG.debug("Starting index update."); final Path tmpPath = tmpFilePath(indexFilePath.toString()); try (FSDataOutputStream out = this.options.fs.create(tmpPath, true); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out))) { TxnRecord txnRecord = new TxnRecord(txId, options.currentFile.toString(), this.options.getCurrentOffset()); bw.write(txnRecord.toString()); bw.newLine(); bw.flush(); out.close(); /* In non error scenarios, for the Azure Data Lake Store File System (adl://), the output stream must be closed before the file associated with it is deleted. For ADLFS deleting the file also removes any handles to the file, hence out.close() will fail. */ /* * Delete the current index file and rename the tmp file to atomically * replace the index file. Orphan .tmp files are handled in getTxnRecord. */ options.fs.delete(this.indexFilePath, false); options.fs.rename(tmpPath, this.indexFilePath); lastSeenTxn = txnRecord; LOG.debug("updateIndex updated lastSeenTxn to [{}]", this.lastSeenTxn); } catch (IOException e) { LOG.warn("Begin commit failed due to IOException. Failing batch", e); throw new FailedException(e); } }
/** * Reads the last txn record from index file if it exists, if not from .tmp file if exists. * * @param indexFilePath the index file path * @return the txn record from the index file or a default initial record. * * @throws IOException */ private TxnRecord getTxnRecord(Path indexFilePath) throws IOException { Path tmpPath = tmpFilePath(indexFilePath.toString()); if (this.options.fs.exists(indexFilePath)) { return readTxnRecord(indexFilePath); } else if (this.options.fs.exists(tmpPath)) { return readTxnRecord(tmpPath); } return new TxnRecord(0, options.currentFile.toString(), 0); }
protected void rotateOutputFile() throws IOException { rotateOutputFile(true); }
public void updateState(List<TridentTuple> tuples, TridentCollector tridentCollector) { try { this.options.execute(tuples); } catch (IOException e) { LOG.warn("Failing batch due to IOException.", e); throw new FailedException(e); } }
@Test public void testIndexFileCreation() throws Exception { HdfsState state = createHdfsState(); state.beginCommit(1L); Collection<File> files = FileUtils.listFiles(new File(TEST_OUT_DIR), null, false); File hdfsIndexFile = Paths.get(TEST_OUT_DIR, INDEX_FILE_PREFIX + TEST_TOPOLOGY_NAME + ".0").toFile(); Assert.assertTrue(files.contains(hdfsIndexFile)); }
@Override void doRecover(Path srcPath, long nBytes) throws IOException { this.offset = 0; FSDataInputStream is = this.fs.open(srcPath); copyBytes(is, out, nBytes); this.offset = nBytes; }
@Override public void beginCommit(Long txId) { if (txId <= lastSeenTxn.txnid) { LOG.info("txID {} is already processed, lastSeenTxn {}. Triggering recovery.", txId, lastSeenTxn); long start = System.currentTimeMillis(); options.recover(lastSeenTxn.dataFilePath, lastSeenTxn.offset); LOG.info("Recovery took {} ms.", System.currentTimeMillis() - start); } updateIndex(txId); }
@Override public void commit(Long txId) { try { options.doCommit(txId); } catch (IOException e) { LOG.warn("Commit failed due to IOException. Failing the batch.", e); throw new FailedException(e); } }
private List<String> getLinesFromCurrentDataFile() throws IOException { Path dataFile = Paths.get(TEST_OUT_DIR, fileNameFormat.getCurrentFileName()); List<String> lines = Files.readAllLines(dataFile, Charset.defaultCharset()); return lines; }
@Test public void testUpdateState() throws Exception { HdfsState state = createHdfsState(); state.beginCommit(1L); int tupleCount = 100; state.updateState(createMockTridentTuples(tupleCount), null); state.commit(1L); state.close(); List<String> lines = getLinesFromCurrentDataFile(); List<String> expected = new ArrayList<>(); for (int i = 0; i < tupleCount; i++) { expected.add("data"); } Assert.assertEquals(tupleCount, lines.size()); Assert.assertEquals(expected, lines); }
@Test public void testRecoverOneBatch() throws Exception { HdfsState state = createHdfsState(); // batch 1 is played with 25 tuples initially. state.beginCommit(1L); state.updateState(createMockTridentTuples(25), null); // batch 1 is replayed with 50 tuples. int replayBatchSize = 50; state.beginCommit(1L); state.updateState(createMockTridentTuples(replayBatchSize), null); state.commit(1L); // close the state to force flush state.close(); // Ensure that the original batch1 is discarded and new one is persisted. List<String> lines = getLinesFromCurrentDataFile(); Assert.assertEquals(replayBatchSize, lines.size()); List<String> expected = new ArrayList<>(); for (int i = 0; i < replayBatchSize; i++) { expected.add("data"); } Assert.assertEquals(expected, lines); }