private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
@Override public void open() throws Exception { super.open(); checkState(this.reader == null, "The reader is already initialized."); checkState(this.serializer != null, "The serializer has not been set. " + "Probably the setOutputType() was not called. Please report it."); this.format.setRuntimeContext(getRuntimeContext()); this.format.configure(new Configuration()); this.checkpointLock = getContainingTask().getCheckpointLock(); // set the reader context based on the time characteristic final TimeCharacteristic timeCharacteristic = getOperatorConfig().getTimeCharacteristic(); final long watermarkInterval = getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval(); this.readerContext = StreamSourceContexts.getSourceContext( timeCharacteristic, getProcessingTimeService(), checkpointLock, getContainingTask().getStreamStatusMaintainer(), output, watermarkInterval, -1); // and initialize the split reading thread this.reader = new SplitReader<>(format, serializer, readerContext, checkpointLock, restoredReaderState); this.restoredReaderState = null; this.reader.start(); }
"The operator state has not been properly initialized."); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); "state backend of operator " + getOperatorName() + '.', e);
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness( BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception { ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>(reader, 10, noOfTasks, taksIdx); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); return testHarness; }
getClass().getSimpleName(), getRuntimeContext().getIndexOfThisSubtask()); long now = getProcessingTimeService().getCurrentProcessingTime(); long runningModTime = Math.max(now, noOfSplits + 1); TimestampedFileInputSplit currentSplit = createTimestampedFileSplit(currSplit, --runningModTime, formatState); restoredReaderState.add(currentSplit); for (FileInputSplit split : pendingSplits) { TimestampedFileInputSplit timestampedSplit = createTimestampedFileSplit(split, --runningModTime); restoredReaderState.add(timestampedSplit); LOG.debug("{} (taskIdx={}) restored {} splits from legacy: {}.", getClass().getSimpleName(), getRuntimeContext().getIndexOfThisSubtask(), restoredReaderState.size(), restoredReaderState);
@Override public void initializeState(StateInitializationContext context) throws Exception { super.initializeState(context); checkState(checkpointedState == null, "The reader state has already been initialized."); checkpointedState = context.getOperatorStateStore().getSerializableListState("splits"); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); if (context.isRestored()) { LOG.info("Restoring state for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); // this may not be null in case we migrate from a previous Flink version. if (restoredReaderState == null) { restoredReaderState = new ArrayList<>(); for (TimestampedFileInputSplit split : checkpointedState.get()) { restoredReaderState.add(split); } if (LOG.isDebugEnabled()) { LOG.debug("{} (taskIdx={}) restored {}.", getClass().getSimpleName(), subtaskIdx, restoredReaderState); } } } else { LOG.info("No state to restore for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); } }
@Override public void initializeState(StateInitializationContext context) throws Exception { super.initializeState(context); checkState(checkpointedState == null, "The reader state has already been initialized."); checkpointedState = context.getOperatorStateStore().getSerializableListState("splits"); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); if (context.isRestored()) { LOG.info("Restoring state for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); // this may not be null in case we migrate from a previous Flink version. if (restoredReaderState == null) { restoredReaderState = new ArrayList<>(); for (TimestampedFileInputSplit split : checkpointedState.get()) { restoredReaderState.add(split); } if (LOG.isDebugEnabled()) { LOG.debug("{} (taskIdx={}) restored {}.", getClass().getSimpleName(), subtaskIdx, restoredReaderState); } } } else { LOG.info("No state to restore for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); } }
@Override public void open() throws Exception { super.open(); checkState(this.reader == null, "The reader is already initialized."); checkState(this.serializer != null, "The serializer has not been set. " + "Probably the setOutputType() was not called. Please report it."); this.format.setRuntimeContext(getRuntimeContext()); this.format.configure(new Configuration()); this.checkpointLock = getContainingTask().getCheckpointLock(); // set the reader context based on the time characteristic final TimeCharacteristic timeCharacteristic = getOperatorConfig().getTimeCharacteristic(); final long watermarkInterval = getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval(); this.readerContext = StreamSourceContexts.getSourceContext( timeCharacteristic, getProcessingTimeService(), checkpointLock, getContainingTask().getStreamStatusMaintainer(), output, watermarkInterval, -1); // and initialize the split reading thread this.reader = new SplitReader<>(format, serializer, readerContext, checkpointLock, restoredReaderState); this.restoredReaderState = null; this.reader.start(); }
@Override public void snapshotState(StateSnapshotContext context) throws Exception { super.snapshotState(context); checkState(checkpointedState != null, "The operator state has not been properly initialized."); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); checkpointedState.clear(); List<TimestampedFileInputSplit> readerState = reader.getReaderState(); try { for (TimestampedFileInputSplit split : readerState) { // create a new partition for each entry. checkpointedState.add(split); } } catch (Exception e) { checkpointedState.clear(); throw new Exception("Could not add timestamped file input splits to to operator " + "state backend of operator " + getOperatorName() + '.', e); } if (LOG.isDebugEnabled()) { LOG.debug("{} (taskIdx={}) checkpointed {} splits: {}.", getClass().getSimpleName(), subtaskIdx, readerState.size(), readerState); } }
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
@Override public void initializeState(StateInitializationContext context) throws Exception { super.initializeState(context); checkState(checkpointedState == null, "The reader state has already been initialized."); checkpointedState = context.getOperatorStateStore().getSerializableListState("splits"); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); if (context.isRestored()) { LOG.info("Restoring state for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); // this may not be null in case we migrate from a previous Flink version. if (restoredReaderState == null) { restoredReaderState = new ArrayList<>(); for (TimestampedFileInputSplit split : checkpointedState.get()) { restoredReaderState.add(split); } if (LOG.isDebugEnabled()) { LOG.debug("{} (taskIdx={}) restored {}.", getClass().getSimpleName(), subtaskIdx, restoredReaderState); } } } else { LOG.info("No state to restore for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); } }
@Override public void open() throws Exception { super.open(); checkState(this.reader == null, "The reader is already initialized."); checkState(this.serializer != null, "The serializer has not been set. " + "Probably the setOutputType() was not called. Please report it."); this.format.setRuntimeContext(getRuntimeContext()); this.format.configure(new Configuration()); this.checkpointLock = getContainingTask().getCheckpointLock(); // set the reader context based on the time characteristic final TimeCharacteristic timeCharacteristic = getOperatorConfig().getTimeCharacteristic(); final long watermarkInterval = getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval(); this.readerContext = StreamSourceContexts.getSourceContext( timeCharacteristic, getProcessingTimeService(), checkpointLock, getContainingTask().getStreamStatusMaintainer(), output, watermarkInterval, -1); // and initialize the split reading thread this.reader = new SplitReader<>(format, serializer, readerContext, checkpointLock, restoredReaderState); this.restoredReaderState = null; this.reader.start(); }
"The operator state has not been properly initialized."); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); "state backend of operator " + getOperatorName() + '.', e);
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
@Override public void initializeState(StateInitializationContext context) throws Exception { super.initializeState(context); checkState(checkpointedState == null, "The reader state has already been initialized."); checkpointedState = context.getOperatorStateStore().getSerializableListState("splits"); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); if (context.isRestored()) { LOG.info("Restoring state for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); // this may not be null in case we migrate from a previous Flink version. if (restoredReaderState == null) { restoredReaderState = new ArrayList<>(); for (TimestampedFileInputSplit split : checkpointedState.get()) { restoredReaderState.add(split); } if (LOG.isDebugEnabled()) { LOG.debug("{} (taskIdx={}) restored {}.", getClass().getSimpleName(), subtaskIdx, restoredReaderState); } } } else { LOG.info("No state to restore for the {} (taskIdx={}).", getClass().getSimpleName(), subtaskIdx); } }
@Override public void open() throws Exception { super.open(); checkState(this.reader == null, "The reader is already initialized."); checkState(this.serializer != null, "The serializer has not been set. " + "Probably the setOutputType() was not called. Please report it."); this.format.setRuntimeContext(getRuntimeContext()); this.format.configure(new Configuration()); this.checkpointLock = getContainingTask().getCheckpointLock(); // set the reader context based on the time characteristic final TimeCharacteristic timeCharacteristic = getOperatorConfig().getTimeCharacteristic(); final long watermarkInterval = getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval(); this.readerContext = StreamSourceContexts.getSourceContext( timeCharacteristic, getProcessingTimeService(), checkpointLock, getContainingTask().getStreamStatusMaintainer(), output, watermarkInterval, -1); // and initialize the split reading thread this.reader = new SplitReader<>(format, serializer, readerContext, checkpointLock, restoredReaderState); this.restoredReaderState = null; this.reader.start(); }
"The operator state has not been properly initialized."); int subtaskIdx = getRuntimeContext().getIndexOfThisSubtask(); "state backend of operator " + getOperatorName() + '.', e);
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }