org.qcri.rheem.basic.channels.FileChannel$Instance java code examples

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    SparkExecutor sparkExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  final String sourcePath;
  if (this.sourcePath != null) {
    assert inputs.length == 0;
    sourcePath = this.sourcePath;
  } else {
    FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    sourcePath = input.getSinglePath();
  }
  RddChannel.Instance output = (RddChannel.Instance) outputs[0];
  final String actualInputPath = FileSystems.findActualSingleInputPath(sourcePath);
  final JavaRDD<String> linesRdd = sparkExecutor.sc.textFile(actualInputPath);
  this.name(linesRdd);
  final JavaRDD<T> dataQuantaRdd = linesRdd
      .map(line -> {
        // TODO: Important. Enrich type informations to create the correct parser!
        int tabPos = line.indexOf('\t');
        return (T) new Tuple2<>(
            Integer.valueOf(line.substring(0, tabPos)),
            Float.valueOf(line.substring(tabPos + 1)));
      });
  this.name(dataQuantaRdd);
  output.accept(dataQuantaRdd, sparkExecutor);
  return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}

final String path = output.addGivenOrTempPath(this.targetPath, javaExecutor.getCompiler().getConfiguration());

  throws IOException {
assert inputFileChannelInstance.wasProduced();
final String inputPath = inputFileChannelInstance.getSinglePath();
final String actualInputPath = FileSystems.findActualSingleInputPath(inputPath);
final FileSystem inputFs = FileSystems.getFileSystem(inputPath).orElseThrow(
    "rheem.graphchi.pagerank.load.main", configuration
));
mainExecutionLineage.addPredecessor(inputFileChannelInstance.getLineage());

final String path = output.addGivenOrTempPath(this.targetPath, javaExecutor.getCompiler().getConfiguration());
final FileSystem fileSystem = FileSystems.getFileSystem(path).orElseThrow(
    () -> new IllegalStateException(String.format("No file system found for \"%s\".", this.targetPath))

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    JavaExecutor javaExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert outputs.length == this.getNumOutputs();
  SequenceFileIterator sequenceFileIterator;
  final String path;
  if (this.sourcePath == null) {
    final FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    path = input.getSinglePath();
  } else {
    assert inputs.length == 0;
    path = this.sourcePath;
  }
  try {
    final String actualInputPath = FileSystems.findActualSingleInputPath(path);
    sequenceFileIterator = new SequenceFileIterator<>(actualInputPath);
    Stream<?> sequenceFileStream =
        StreamSupport.stream(Spliterators.spliteratorUnknownSize(sequenceFileIterator, 0), false);
    ((StreamChannel.Instance) outputs[0]).accept(sequenceFileStream);
  } catch (IOException e) {
    throw new RheemException(String.format("%s failed to read from %s.", this, path), e);
  }
  return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext);
}

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    SparkExecutor sparkExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  final String sourcePath;
  if (this.sourcePath != null) {
    assert inputs.length == 0;
    sourcePath = this.sourcePath;
  } else {
    FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    sourcePath = input.getSinglePath();
  }
  RddChannel.Instance output = (RddChannel.Instance) outputs[0];
  final String actualInputPath = FileSystems.findActualSingleInputPath(sourcePath);
  final JavaRDD<Object> rdd = sparkExecutor.sc.objectFile(actualInputPath);
  this.name(rdd);
  output.accept(rdd, sparkExecutor);
  return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    SparkExecutor sparkExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert inputs.length == this.getNumInputs();
  final FileChannel.Instance output = (FileChannel.Instance) outputs[0];
  final String targetPath = output.addGivenOrTempPath(this.targetPath, sparkExecutor.getConfiguration());
  final RddChannel.Instance input = (RddChannel.Instance) inputs[0];
  final JavaRDD<Object> rdd = input.provideRdd();
  final JavaRDD<String> serializedRdd = rdd
      .map(dataQuantum -> {
        // TODO: Once there are more tuple types, make this generic.
        @SuppressWarnings("unchecked")
        Tuple2<Object, Object> tuple2 = (Tuple2<Object, Object>) dataQuantum;
        return String.valueOf(tuple2.field0) + '\t' + String.valueOf(tuple2.field1);
      });
  this.name(serializedRdd);
  serializedRdd
      .coalesce(1) // TODO: Allow more than one TSV file?
      .saveAsTextFile(targetPath);
  return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext);
}

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    JavaExecutor javaExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert outputs.length == this.getNumOutputs();
  final String path;
  if (this.sourcePath == null) {
    final FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    path = input.getSinglePath();
  } else {
    assert inputs.length == 0;
    path = this.sourcePath;
  }
  final String actualInputPath = FileSystems.findActualSingleInputPath(path);
  Stream<T> stream = this.createStream(actualInputPath);
  ((StreamChannel.Instance) outputs[0]).accept(stream);
  return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    SparkExecutor sparkExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert inputs.length == this.getNumInputs();
  assert outputs.length <= 1;
  final FileChannel.Instance output = (FileChannel.Instance) outputs[0];
  final String targetPath = output.addGivenOrTempPath(this.targetPath, sparkExecutor.getConfiguration());
  RddChannel.Instance input = (RddChannel.Instance) inputs[0];
  input.provideRdd()
      .coalesce(1) // TODO: Remove. This only hotfixes the issue that JavaObjectFileSource reads only a single file.
      .saveAsObjectFile(targetPath);
  LoggerFactory.getLogger(this.getClass()).info("Writing dataset to {}.", targetPath);
  return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext);
}

  @Override
  public void doDispose() throws RheemException {
    Actions.doSafe(() -> {
      logger.info("Deleting file channel instances {}.", this.paths);
      final String path = this.getSinglePath();
      final Optional<FileSystem> fileSystemOptional = FileSystems.getFileSystem(path);
      fileSystemOptional.ifPresent(fs -> {
        try {
          fs.delete(path, true);
        } catch (IOException e) {
          throw new UncheckedIOException(e);
        }
      });
    });
  }
}

@Override
public ChannelInstance createInstance(Executor executor, OptimizationContext.OperatorContext producerOperatorContext, int producerOutputIndex) {
  // NB: File channels are not inherent to a certain Platform, therefore are not tied to the executor.
  return new Instance(producerOperatorContext, producerOutputIndex);
}

public String addGivenOrTempPath(String pathOrNull, Configuration configuration) {
  final String path = pathOrNull == null ? this.generateTempPath(configuration) : pathOrNull;
  this.addPath(path);
  return path;
}

Javadoc

ChannelInstance implementation for FileChannels.

Most used methods

getSinglePath
If there is only a single element on #getPaths(), retrieves it. Otherwise, fails.
addGivenOrTempPath
<init>
Creates a new instance.
addPath
generateTempPath
getLineage
wasProduced

Popular in Java

Creating JSON documents from java classes using gson
onRequestPermissionsResult (Fragment)
notifyDataSetChanged (ArrayAdapter)
getSupportFragmentManager (FragmentActivity)
PrintStream (java.io)
Fake signature of an existing Java class.
String (java.lang)
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Component (java.awt)
A component is an object having a graphical representation that can be displayed on the screen and t
Top 12 Jupyter Notebook extensions

How to useFileChannel$Instance in org.qcri.rheem.basic.channels

Best Java code snippets using org.qcri.rheem.basic.channels.FileChannel$Instance (Showing top 12 results out of 315)

How to use
FileChannel$Instance
in
org.qcri.rheem.basic.channels