final BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(path), "UTF-8")); return new Iterator<String>() {
@Override public void updateProgress(HashMap<String, Integer> partialProgress) throws IOException { HashMap<String, Object> progressBar = new HashMap<>(); Integer overall = 0; for (String operatorName : partialProgress.keySet()) { this.progress.put(operatorName, partialProgress.get(operatorName)); } for (String operatorName: this.progress.keySet()) { overall = overall + this.progress.get(operatorName); } if (this.progress.size()>0) overall = overall/this.progress.size(); final FileSystem progressFile = FileSystems.getFileSystem(progressUrl).get(); try (final OutputStreamWriter writer = new OutputStreamWriter(progressFile.create(progressUrl, true))) { progressBar.put("overall", overall); progressBar.put("details", progress); JSONObject jsonProgress = new JSONObject(progressBar); writer.write(jsonProgress.toString()); } catch (UncheckedIOException e) { throw e.getCause(); } } }
public static Optional<FileSystem> getFileSystem(String fileUrl) { return registeredFileSystems.stream() .filter(fileSystem -> fileSystem.canHandle(fileUrl)) .findAny(); }
/** * Systems such as Spark do not produce a single output file often times. That method tries to detect such * split object files to reassemble them correctly. As of now assumes either a Spark layout or a single file. * * @param ostensibleInputFile the path to that has been written using some framework; might be a dictionary * @return all actual input files */ public static Collection<String> findActualInputPaths(String ostensibleInputFile) { final Optional<FileSystem> fsOptional = getFileSystem(ostensibleInputFile); if (!fsOptional.isPresent()) { LoggerFactory.getLogger(FileSystems.class).warn("Could not inspect input file {}.", ostensibleInputFile); return Collections.singleton(ostensibleInputFile); } final FileSystem fs = fsOptional.get(); if (fs.isDirectory(ostensibleInputFile)) { final Collection<String> children = fs.listChildren(ostensibleInputFile); // Look for Spark-like directory structure. if (children.stream().anyMatch(child -> child.endsWith("_SUCCESS"))) { return children.stream().filter(child -> child.matches(".*/part-\\d+")).collect(Collectors.toList()); } else { throw new RheemException("Could not identify directory structure: " + children); } } return Collections.singleton(ostensibleInputFile); }
String graphName = tempFile.toString(); final int numShards = 2 + (int) inputFs.getFileSize(actualInputPath) / (10 * 1000000); if (!new File(ChiFilenames.getFilenameIntervals(graphName, numShards)).exists()) { FastSharder sharder = createSharder(graphName, numShards); final InputStream inputStream = inputFs.open(actualInputPath); sharder.shard(inputStream, FastSharder.GraphInputFormat.EDGELIST); } else {
/** * Determine the number of bytes of a given file. This method is not only a short-cut to * {@link FileSystem#getFileSize(String)} but also caches file sizes for performance reasons. * * @param fileUrl the URL of the file * @return the number of bytes of the file if it could be determined */ public static OptionalLong getFileSize(String fileUrl) { if (fileSizeCache.containsKey(fileUrl)) { return OptionalLong.of(fileSizeCache.get(fileUrl)); } final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(fileUrl); if (fileSystem.isPresent()) { try { final long fileSize = fileSystem.get().getFileSize(fileUrl); fileSizeCache.put(fileUrl, fileSize); return OptionalLong.of(fileSize); } catch (FileNotFoundException e) { LOGGER.warn("Could not determine file size.", e); } } return OptionalLong.empty(); }
@Override public void doDispose() throws RheemException { Actions.doSafe(() -> { logger.info("Deleting file channel instances {}.", this.paths); final String path = this.getSinglePath(); final Optional<FileSystem> fileSystemOptional = FileSystems.getFileSystem(path); fileSystemOptional.ifPresent(fs -> { try { fs.delete(path, true); } catch (IOException e) { throw new UncheckedIOException(e); } }); }); } }
/** * Adjusts this instance to the properties specified in the given file. * * @param configurationUrl URL to the configuration file */ public void load(String configurationUrl) { final Optional<FileSystem> fileSystem = FileSystems.getFileSystem(configurationUrl); if (!fileSystem.isPresent()) { throw new RheemException(String.format("Could not access %s.", configurationUrl)); } try (InputStream configInputStream = fileSystem.get().open(configurationUrl)) { this.load(configInputStream); } catch (Exception e) { throw new RheemException(String.format("Could not load configuration from %s.", configurationUrl), e); } }
@Override public void initialize(Configuration config, String runId, List<Map> initialExecutionPlan) throws IOException { this.initialExecutionPlan = initialExecutionPlan; this.runId = runId; String runsDir = config.getStringProperty(DEFAULT_MONITOR_BASE_URL_PROPERTY_KEY, DEFAULT_MONITOR_BASE_URL); final String path = runsDir + "/" + runId; this.exPlanUrl = path + "/execplan.json"; this.progressUrl = path + "/progress.json"; final FileSystem execplanFile = FileSystems.getFileSystem(exPlanUrl).get(); try (final OutputStreamWriter writer = new OutputStreamWriter(execplanFile.create(exPlanUrl, true))) { HashMap<String, Object> jsonPlanMap = new HashMap<>(); jsonPlanMap.put("stages", initialExecutionPlan); jsonPlanMap.put("run_id", runId); JSONObject jsonPlan = new JSONObject(jsonPlanMap); writer.write(jsonPlan.toString()); } catch (UncheckedIOException e) { throw e.getCause(); } HashMap<String, Integer> initialProgress = new HashMap<>(); for (Map stage: initialExecutionPlan) { for (Map operator: (List<Map>)stage.get("operators")) { initialProgress.put((String)operator.get("name"), 0); } } updateProgress(initialProgress); }
try (LimitedInputStream lis = new LimitedInputStream(fileSystem.get().open(TextFileSource.this.inputUrl), 1 * MiB)) { final BufferedReader bufferedReader = new BufferedReader( new InputStreamReader(lis, TextFileSource.this.encoding)
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == 1; assert outputs.length == 0; JavaChannelInstance input = (JavaChannelInstance) inputs[0]; final FileSystem fs = FileSystems.requireFileSystem(this.textFileUrl); final Function<T, String> formatter = javaExecutor.getCompiler().compile(this.formattingDescriptor); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(this.textFileUrl)))) { input.<T>provideStream().forEach( dataQuantum -> { try { writer.write(formatter.apply(dataQuantum)); writer.write('\n'); } catch (IOException e) { throw new UncheckedIOException(e); } } ); } catch (IOException e) { throw new RheemException("Writing failed.", e); } return ExecutionOperator.modelEagerExecution(inputs, outputs, operatorContext); }
@Override public Tuple<Collection<ExecutionLineageNode>, Collection<ChannelInstance>> evaluate( ChannelInstance[] inputs, ChannelInstance[] outputs, JavaExecutor javaExecutor, OptimizationContext.OperatorContext operatorContext) { assert inputs.length == this.getNumInputs(); assert outputs.length == this.getNumOutputs(); String url = this.getInputUrl().trim(); FileSystem fs = FileSystems.getFileSystem(url).orElseThrow( () -> new RheemException(String.format("Cannot access file system of %s.", url)) ); try { final InputStream inputStream = fs.open(url); Stream<String> lines = new BufferedReader(new InputStreamReader(inputStream)).lines(); ((StreamChannel.Instance) outputs[0]).accept(lines); } catch (IOException e) { throw new RheemException(String.format("Reading %s failed.", url), e); } ExecutionLineageNode prepareLineageNode = new ExecutionLineageNode(operatorContext); prepareLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.prepare", javaExecutor.getConfiguration() )); ExecutionLineageNode mainLineageNode = new ExecutionLineageNode(operatorContext); mainLineageNode.add(LoadProfileEstimators.createFromSpecification( "rheem.java.textfilesource.load.main", javaExecutor.getConfiguration() )); outputs[0].getLineage().addPredecessor(mainLineageNode); return prepareLineageNode.collectAndMark(); }
fileSystem.create(path), "UTF-8"
); Object yaml; try (final InputStream inputStream = fileSystem.open(yamlUrl)) { yaml = new Yaml().load(inputStream); } catch (IOException e) {