org.qcri.rheem.core.util.fs.FileSystems java code examples

public static FileSystem requireFileSystem(String fileUrl) {
  return getFileSystem(fileUrl).orElseThrow(
      () -> new RheemException(String.format("Could not identify filesystem for \"%s\".", fileUrl))
  );
}

/**
 * As {@link #findActualInputPaths(String)} but requires the presence of only a single input file.
 */
public static String findActualSingleInputPath(String ostensibleInputFile) {
  final Collection<String> inputPaths = FileSystems.findActualInputPaths(ostensibleInputFile);
  if (inputPaths.size() != 1) {
    throw new RheemException(String.format(
        "Illegal number of files for \"%s\": %s", ostensibleInputFile, inputPaths
    )); // TODO: Add support.
  }
  return inputPaths.iterator().next();
}

final String actualInputPath = FileSystems.findActualSingleInputPath(inputPath);
final FileSystem inputFs = FileSystems.getFileSystem(inputPath).orElseThrow(
    () -> new RheemException(String.format("Could not identify filesystem for \"%s\".", inputPath))
);

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    JavaExecutor javaExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert outputs.length == this.getNumOutputs();
  SequenceFileIterator sequenceFileIterator;
  final String path;
  if (this.sourcePath == null) {
    final FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    path = input.getSinglePath();
  } else {
    assert inputs.length == 0;
    path = this.sourcePath;
  }
  try {
    final String actualInputPath = FileSystems.findActualSingleInputPath(path);
    sequenceFileIterator = new SequenceFileIterator<>(actualInputPath);
    Stream<?> sequenceFileStream =
        StreamSupport.stream(Spliterators.spliteratorUnknownSize(sequenceFileIterator, 0), false);
    ((StreamChannel.Instance) outputs[0]).accept(sequenceFileStream);
  } catch (IOException e) {
    throw new RheemException(String.format("%s failed to read from %s.", this, path), e);
  }
  return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext);
}

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    JavaExecutor javaExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert inputs.length == 1;
  assert outputs.length == 0;
  JavaChannelInstance input = (JavaChannelInstance) inputs[0];
  final FileSystem fs = FileSystems.requireFileSystem(this.textFileUrl);
  final Function<T, String> formatter = javaExecutor.getCompiler().compile(this.formattingDescriptor);
  try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(this.textFileUrl)))) {
    input.<T>provideStream().forEach(
        dataQuantum -> {
          try {
            writer.write(formatter.apply(dataQuantum));
            writer.write('\n');
          } catch (IOException e) {
            throw new UncheckedIOException(e);
          }
        }
    );
  } catch (IOException e) {
    throw new RheemException("Writing failed.", e);
  }
  return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext);
}

OptionalLong fileSize = FileSystems.getFileSize(TextFileSource.this.inputUrl);
if (!fileSize.isPresent()) {
  TextFileSource.this.logger.warn("Could not determine size of {}... deliver fallback estimate.",

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    SparkExecutor sparkExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  final String sourcePath;
  if (this.sourcePath != null) {
    assert inputs.length == 0;
    sourcePath = this.sourcePath;
  } else {
    FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    sourcePath = input.getSinglePath();
  }
  RddChannel.Instance output = (RddChannel.Instance) outputs[0];
  final String actualInputPath = FileSystems.findActualSingleInputPath(sourcePath);
  final JavaRDD<String> linesRdd = sparkExecutor.sc.textFile(actualInputPath);
  this.name(linesRdd);
  final JavaRDD<T> dataQuantaRdd = linesRdd
      .map(line -> {
        // TODO: Important. Enrich type informations to create the correct parser!
        int tabPos = line.indexOf('\t');
        return (T) new Tuple2<>(
            Integer.valueOf(line.substring(0, tabPos)),
            Float.valueOf(line.substring(tabPos + 1)));
      });
  this.name(dataQuantaRdd);
  output.accept(dataQuantaRdd, sparkExecutor);
  return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}

/**
 * Creates a {@link Stream} of a lines of the file.
 *
 * @param path of the file
 * @return the {@link Stream}
 */
private Stream<String> streamLines(String path) {
  final FileSystem fileSystem = FileSystems.getFileSystem(path).orElseThrow(
      () -> new IllegalStateException(String.format("No file system found for %s", path))
  );
  try {
    Iterator<String> lineIterator = this.createLineIterator(fileSystem, path);
    return StreamSupport.stream(Spliterators.spliteratorUnknownSize(lineIterator, 0), false);
  } catch (IOException e) {
    throw new RheemException(String.format("%s failed to read %s.", this, path), e);
  }
}

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    JavaExecutor javaExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert outputs.length == this.getNumOutputs();
  final String path;
  if (this.sourcePath == null) {
    final FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    path = input.getSinglePath();
  } else {
    assert inputs.length == 0;
    path = this.sourcePath;
  }
  final String actualInputPath = FileSystems.findActualSingleInputPath(path);
  Stream<T> stream = this.createStream(actualInputPath);
  ((StreamChannel.Instance) outputs[0]).accept(stream);
  return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}

  @Override
  public void updateProgress(HashMap<String, Integer> partialProgress) throws IOException {
    HashMap<String, Object> progressBar = new HashMap<>();
    Integer overall = 0;
    for (String operatorName : partialProgress.keySet()) {
      this.progress.put(operatorName, partialProgress.get(operatorName));
    }

    for (String operatorName: this.progress.keySet()) {
      overall = overall + this.progress.get(operatorName);
    }

    if (this.progress.size()>0)
      overall = overall/this.progress.size();

    final FileSystem progressFile = FileSystems.getFileSystem(progressUrl).get();
    try (final OutputStreamWriter writer = new OutputStreamWriter(progressFile.create(progressUrl, true))) {
      progressBar.put("overall", overall);
      progressBar.put("details", progress);

      JSONObject jsonProgress = new JSONObject(progressBar);
      writer.write(jsonProgress.toString());
    } catch (UncheckedIOException e) {
      throw e.getCause();
    }
  }
}

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    SparkExecutor sparkExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  final String sourcePath;
  if (this.sourcePath != null) {
    assert inputs.length == 0;
    sourcePath = this.sourcePath;
  } else {
    FileChannel.Instance input = (FileChannel.Instance) inputs[0];
    sourcePath = input.getSinglePath();
  }
  RddChannel.Instance output = (RddChannel.Instance) outputs[0];
  final String actualInputPath = FileSystems.findActualSingleInputPath(sourcePath);
  final JavaRDD<Object> rdd = sparkExecutor.sc.objectFile(actualInputPath);
  this.name(rdd);
  output.accept(rdd, sparkExecutor);
  return ExecutionOperator.modelLazyExecution(inputs, outputs, operatorContext);
}

/**
 * Systems such as Spark do not produce a single output file often times. That method tries to detect such
 * split object files to reassemble them correctly. As of now assumes either a Spark layout or a single file.
 *
 * @param ostensibleInputFile the path to that has been written using some framework; might be a dictionary
 * @return all actual input files
 */
public static Collection<String> findActualInputPaths(String ostensibleInputFile) {
  final Optional<FileSystem> fsOptional = getFileSystem(ostensibleInputFile);
  if (!fsOptional.isPresent()) {
    LoggerFactory.getLogger(FileSystems.class).warn("Could not inspect input file {}.", ostensibleInputFile);
    return Collections.singleton(ostensibleInputFile);
  }
  final FileSystem fs = fsOptional.get();
  if (fs.isDirectory(ostensibleInputFile)) {
    final Collection<String> children = fs.listChildren(ostensibleInputFile);
    // Look for Spark-like directory structure.
    if (children.stream().anyMatch(child -> child.endsWith("_SUCCESS"))) {
      return children.stream().filter(child -> child.matches(".*/part-\\d+")).collect(Collectors.toList());
    } else {
      throw new RheemException("Could not identify directory structure: " + children);
    }
  }
  return Collections.singleton(ostensibleInputFile);
}

  @Override
  public void doDispose() throws RheemException {
    Actions.doSafe(() -> {
      logger.info("Deleting file channel instances {}.", this.paths);
      final String path = this.getSinglePath();
      final Optional<FileSystem> fileSystemOptional = FileSystems.getFileSystem(path);
      fileSystemOptional.ifPresent(fs -> {
        try {
          fs.delete(path, true);
        } catch (IOException e) {
          throw new UncheckedIOException(e);
        }
      });
    });
  }
}

@Override
public void initialize(Configuration config, String runId, List<Map> initialExecutionPlan) throws IOException {
  this.initialExecutionPlan = initialExecutionPlan;
  this.runId = runId;
  String runsDir = config.getStringProperty(DEFAULT_MONITOR_BASE_URL_PROPERTY_KEY, DEFAULT_MONITOR_BASE_URL);
  final String path = runsDir + "/" + runId;
  this.exPlanUrl = path + "/execplan.json";
  this.progressUrl = path + "/progress.json";
  final FileSystem execplanFile = FileSystems.getFileSystem(exPlanUrl).get();
  try (final OutputStreamWriter writer = new OutputStreamWriter(execplanFile.create(exPlanUrl, true))) {
    HashMap<String, Object> jsonPlanMap = new HashMap<>();
    jsonPlanMap.put("stages", initialExecutionPlan);
    jsonPlanMap.put("run_id", runId);
    JSONObject jsonPlan = new JSONObject(jsonPlanMap);
    writer.write(jsonPlan.toString());
  } catch (UncheckedIOException e) {
    throw e.getCause();
  }
  HashMap<String, Integer> initialProgress = new HashMap<>();
  for (Map stage: initialExecutionPlan) {
    for (Map operator: (List<Map>)stage.get("operators")) {
      initialProgress.put((String)operator.get("name"), 0);
    }
  }
  updateProgress(initialProgress);
}

/**
 * Adjusts this instance to the properties specified in the given file.
 *
 * @param configurationUrl URL to the configuration file
 */
public void load(String configurationUrl) {
  final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(configurationUrl);
  if (!fileSystem.isPresent()) {
    throw new RheemException(String.format("Could not access %s.", configurationUrl));
  }
  try (InputStream configInputStream = fileSystem.get().open(configurationUrl)) {
    this.load(configInputStream);
  } catch (Exception e) {
    throw new RheemException(String.format("Could not load configuration from %s.", configurationUrl), e);
  }
}

/**
 * Determine the number of bytes of a given file. This method is not only a short-cut to
 * {@link FileSystem#getFileSize(String)} but also caches file sizes for performance reasons.
 *
 * @param fileUrl the URL of the file
 * @return the number of bytes of the file if it could be determined
 */
public static OptionalLong getFileSize(String fileUrl) {
  if (fileSizeCache.containsKey(fileUrl)) {
    return OptionalLong.of(fileSizeCache.get(fileUrl));
  }
  final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(fileUrl);
  if (fileSystem.isPresent()) {
    try {
      final long fileSize = fileSystem.get().getFileSize(fileUrl);
      fileSizeCache.put(fileUrl, fileSize);
      return OptionalLong.of(fileSize);
    } catch (FileNotFoundException e) {
      LOGGER.warn("Could not determine file size.", e);
    }
  }
  return OptionalLong.empty();
}

final FileSystem fileSystem = FileSystems.getFileSystem(path).orElseThrow(
    () -> new IllegalStateException(String.format("No file system found for \"%s\".", this.targetPath))
);

final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(TextFileSource.this.inputUrl);
if (fileSystem.isPresent()) {

@Override
public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate(
    ChannelInstance[] inputs,
    ChannelInstance[] outputs,
    JavaExecutor javaExecutor,
    OptimizationContext.OperatorContext operatorContext) {
  assert inputs.length == this.getNumInputs();
  assert outputs.length == this.getNumOutputs();
  String url = this.getInputUrl().trim();
  FileSystem fs = FileSystems.getFileSystem(url).orElseThrow(
      () -> new RheemException(String.format("Cannot access file system of %s.", url))
  );
  try {
    final InputStream inputStream = fs.open(url);
    Stream<String> lines = new BufferedReader(new InputStreamReader(inputStream)).lines();
    ((StreamChannel.Instance) outputs[0]).accept(lines);
  } catch (IOException e) {
    throw new RheemException(String.format("Reading %s failed.", url), e);
  }
  ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext);
  prepareLineageNode.add(LoadProfileEstimators.createFromSpecification(
      "rheem.java.textfilesource.load.prepare", javaExecutor.getConfiguration()
  ));
  ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext);
  mainLineageNode.add(LoadProfileEstimators.createFromSpecification(
      "rheem.java.textfilesource.load.main", javaExecutor.getConfiguration()
  ));
  outputs[0].getLineage().addPredecessor(mainLineageNode);
  return prepareLineageNode.collectAndMark();
}

final FileSystem fileSystem = FileSystems.getFileSystem(yamlUrl).orElseThrow(
    () -> new RheemException(String.format("No filesystem for %s.", yamlUrl))
);

Javadoc

Tool to work with FileSystems.

Most used methods

getFileSystem
findActualSingleInputPath
As #findActualInputPaths(String) but requires the presence of only a single input file.
findActualInputPaths
Systems such as Spark do not produce a single output file often times. That method tries to detect s
getFileSize
Determine the number of bytes of a given file. This method is not only a short-cut to FileSystem#get
requireFileSystem

Popular in Java

Reactive rest calls using spring rest template
runOnUiThread (Activity)
compareTo (BigDecimal)
getApplicationContext (Context)
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
Date (java.sql)
A class which can consume and produce dates in SQL Date format. Dates are represented in SQL as yyyy
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Best IntelliJ plugins

How to useFileSystems in org.qcri.rheem.core.util.fs

Best Java code snippets using org.qcri.rheem.core.util.fs.FileSystems (Showing top 20 results out of 315)

How to use
FileSystems
in
org.qcri.rheem.core.util.fs