public static FileSystem requireFileSystem(String fileUrl) { return getFileSystem(fileUrl).orElseThrow( () -> new RheemException(String.format("Could not identify filesystem for \"%s\".", fileUrl)) ); }
/** * Creates a {@link Stream} of a lines of the file. * * @param path of the file * @return the {@link Stream} */ private Stream<String> streamLines(String path) { final FileSystem fileSystem = FileSystems.getFileSystem(path).orElseThrow( () -> new IllegalStateException(String.format("No file system found for %s", path)) ); try { Iterator<String> lineIterator = this.createLineIterator(fileSystem, path); return StreamSupport.stream(Spliterators.spliteratorUnknownSize(lineIterator, 0), false); } catch (IOException e) { throw new RheemException(String.format("%s failed to read %s.", this, path), e); } }
@Override public void updateProgress(HashMap<String, Integer> partialProgress) throws IOException { HashMap<String, Object> progressBar = new HashMap<>(); Integer overall = 0; for (String operatorName : partialProgress.keySet()) { this.progress.put(operatorName, partialProgress.get(operatorName)); } for (String operatorName: this.progress.keySet()) { overall = overall + this.progress.get(operatorName); } if (this.progress.size()>0) overall = overall/this.progress.size(); final FileSystem progressFile = FileSystems.getFileSystem(progressUrl).get(); try (final OutputStreamWriter writer = new OutputStreamWriter(progressFile.create(progressUrl, true))) { progressBar.put("overall", overall); progressBar.put("details", progress); JSONObject jsonProgress = new JSONObject(progressBar); writer.write(jsonProgress.toString()); } catch (UncheckedIOException e) { throw e.getCause(); } } }
/** * Systems such as Spark do not produce a single output file often times. That method tries to detect such * split object files to reassemble them correctly. As of now assumes either a Spark layout or a single file. * * @param ostensibleInputFile the path to that has been written using some framework; might be a dictionary * @return all actual input files */ public static Collection<String> findActualInputPaths(String ostensibleInputFile) { final Optional<FileSystem> fsOptional = getFileSystem(ostensibleInputFile); if (!fsOptional.isPresent()) { LoggerFactory.getLogger(FileSystems.class).warn("Could not inspect input file {}.", ostensibleInputFile); return Collections.singleton(ostensibleInputFile); } final FileSystem fs = fsOptional.get(); if (fs.isDirectory(ostensibleInputFile)) { final Collection<String> children = fs.listChildren(ostensibleInputFile); // Look for Spark-like directory structure. if (children.stream().anyMatch(child -> child.endsWith("_SUCCESS"))) { return children.stream().filter(child -> child.matches(".*/part-\\d+")).collect(Collectors.toList()); } else { throw new RheemException("Could not identify directory structure: " + children); } } return Collections.singleton(ostensibleInputFile); }
@Override public void doDispose() throws RheemException { Actions.doSafe(() -> { logger.info("Deleting file channel instances {}.", this.paths); final String path = this.getSinglePath(); final Optional<FileSystem> fileSystemOptional = FileSystems.getFileSystem(path); fileSystemOptional.ifPresent(fs -> { try { fs.delete(path, true); } catch (IOException e) { throw new UncheckedIOException(e); } }); }); } }
/** * Adjusts this instance to the properties specified in the given file. * * @param configurationUrl URL to the configuration file */ public void load(String configurationUrl) { final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(configurationUrl); if (!fileSystem.isPresent()) { throw new RheemException(String.format("Could not access %s.", configurationUrl)); } try (InputStream configInputStream = fileSystem.get().open(configurationUrl)) { this.load(configInputStream); } catch (Exception e) { throw new RheemException(String.format("Could not load configuration from %s.", configurationUrl), e); } }
@Override public void initialize(Configuration config, String runId, List<Map> initialExecutionPlan) throws IOException { this.initialExecutionPlan = initialExecutionPlan; this.runId = runId; String runsDir = config.getStringProperty(DEFAULT_MONITOR_BASE_URL_PROPERTY_KEY, DEFAULT_MONITOR_BASE_URL); final String path = runsDir + "/" + runId; this.exPlanUrl = path + "/execplan.json"; this.progressUrl = path + "/progress.json"; final FileSystem execplanFile = FileSystems.getFileSystem(exPlanUrl).get(); try (final OutputStreamWriter writer = new OutputStreamWriter(execplanFile.create(exPlanUrl, true))) { HashMap<String, Object> jsonPlanMap = new HashMap<>(); jsonPlanMap.put("stages", initialExecutionPlan); jsonPlanMap.put("run_id", runId); JSONObject jsonPlan = new JSONObject(jsonPlanMap); writer.write(jsonPlan.toString()); } catch (UncheckedIOException e) { throw e.getCause(); } HashMap<String, Integer> initialProgress = new HashMap<>(); for (Map stage: initialExecutionPlan) { for (Map operator: (List<Map>)stage.get("operators")) { initialProgress.put((String)operator.get("name"), 0); } } updateProgress(initialProgress); }
/** * Determine the number of bytes of a given file. This method is not only a short-cut to * {@link FileSystem#getFileSize(String)} but also caches file sizes for performance reasons. * * @param fileUrl the URL of the file * @return the number of bytes of the file if it could be determined */ public static OptionalLong getFileSize(String fileUrl) { if (fileSizeCache.containsKey(fileUrl)) { return OptionalLong.of(fileSizeCache.get(fileUrl)); } final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(fileUrl); if (fileSystem.isPresent()) { try { final long fileSize = fileSystem.get().getFileSize(fileUrl); fileSizeCache.put(fileUrl, fileSize); return OptionalLong.of(fileSize); } catch (FileNotFoundException e) { LOGGER.warn("Could not determine file size.", e); } } return OptionalLong.empty(); }
final FileSystem fileSystem = FileSystems.getFileSystem(path).orElseThrow( () -> new IllegalStateException(String.format("No file system found for \"%s\".", this.targetPath)) );
final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(TextFileSource.this.inputUrl); if (fileSystem.isPresent()) {
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); String url = this.getInputUrl().trim(); FileSystem fs = FileSystems.getFileSystem(url).orElseThrow( () -> new RheemException(String.format("Cannot access file system of %s.", url)) ); try { final InputStream inputStream = fs.open(url); Stream<String> lines = new BufferedReader(new InputStreamReader(inputStream)).lines(); ((StreamChannel.Instance) outputs[0]).accept(lines); } catch (IOException e) { throw new RheemException(String.format("Reading %s failed.", url), e); } ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext); prepareLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.prepare", javaExecutor.getConfiguration() )); ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext); mainLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.main", javaExecutor.getConfiguration() )); outputs[0].getLineage().addPredecessor(mainLineageNode); return prepareLineageNode.collectAndMark(); }
final FileSystem inputFs = FileSystems.getFileSystem(inputPath).orElseThrow( () -> new RheemException(String.format("Could not identify filesystem for \"%s\".", inputPath)) );
final FileSystem fileSystem = FileSystems.getFileSystem(yamlUrl).orElseThrow( () -> new RheemException(String.format("No filesystem for %s.", yamlUrl)) );