@Override public Iterator<E> iterator() { if (materialized == null) { this.result = pipeline.run(); if (result.succeeded() || !pipeline.getConfiguration().getBoolean("crunch.empty.materialize.on.failure", false)) { materialize(); } else { LOG.error("Pipeline run failed, returning empty iterator"); return Iterators.emptyIterator(); } } return materialized.iterator(); }
public MemExecution() { this.res = new PipelineResult( ImmutableList.of(new PipelineResult.StageResult("MemPipelineStage", COUNTERS)), PipelineExecution.Status.SUCCEEDED); }
private String getJobInfo(PipelineResult job, boolean isVerbose) { StringBuilder buf = new StringBuilder(); for (StageResult stage : job.getStageResults()) { buf.append("\nstageId: " + stage.getStageId() + ", stageName: " + stage.getStageName()); if (isVerbose) { buf.append(", counters: "); Map<String, Set<String>> sortedCounterMap = new TreeMap<String, Set<String>>(stage.getCounterNames()); for (Map.Entry<String, Set<String>> entry : sortedCounterMap.entrySet()) { String groupName = entry.getKey(); buf.append("\n" + groupName); Set<String> sortedCounterNames = new TreeSet<String>(entry.getValue()); for (String counterName : sortedCounterNames) { buf.append("\n " + counterName + " : " + stage.getCounterValue(groupName, counterName)); } } } } return buf.toString(); }
private boolean done(Pipeline job, boolean isVerbose) { if (isVerbose) { job.enableDebug(); job.getConfiguration().setBoolean("crunch.log.job.progress", true); // see class RuntimeParameters } String name = job.getName(); LOG.debug("Running pipeline: " + name); pipelineResult = job.done(); boolean success = pipelineResult.succeeded(); if (success) { LOG.info("Succeeded with pipeline: " + name + " " + getJobInfo(pipelineResult, isVerbose)); } else { LOG.error("Pipeline failed: " + name + " " + getJobInfo(pipelineResult, isVerbose)); } return success; }
public PipelineResult execute() { try { Thread controlThread = new Thread(control); controlThread.start(); while (!control.allFinished()) { Thread.sleep(1000); } control.stop(); } catch (InterruptedException e) { LOG.info(e); } List<CrunchControlledJob> failures = control.getFailedJobList(); if (!failures.isEmpty()) { System.err.println(failures.size() + " job failure(s) occurred:"); for (CrunchControlledJob job : failures) { System.err.println(job.getJobName() + "(" + job.getJobID() + "): " + job.getMessage()); } } List<PipelineResult.StageResult> stages = Lists.newArrayList(); for (CrunchControlledJob job : control.getSuccessfulJobList()) { try { stages.add(new PipelineResult.StageResult(job.getJobName(), job.getJob().getCounters())); } catch (Exception e) { LOG.error("Exception thrown fetching job counters for stage: " + job.getJobName(), e); } } return new PipelineResult(stages); } }
StageResult sr = Iterables.getFirst(result.getStageResults(), null); if (sr != null && MAP_INPUT_RECORDS != null) { this.count = sr.getCounterValue(MAP_INPUT_RECORDS);
return result.succeeded() ? 0 : 1;
public PipelineResult execute() { try { Thread controlThread = new Thread(control); controlThread.start(); while (!control.allFinished()) { Thread.sleep(1000); } control.stop(); } catch (InterruptedException e) { LOG.info(e); } List<CrunchControlledJob> failures = control.getFailedJobList(); if (!failures.isEmpty()) { System.err.println(failures.size() + " job failure(s) occurred:"); for (CrunchControlledJob job : failures) { System.err.println(job.getJobName() + "(" + job.getJobID() + "): " + job.getMessage()); } } List<PipelineResult.StageResult> stages = Lists.newArrayList(); for (CrunchControlledJob job : control.getSuccessfulJobList()) { try { stages.add(new PipelineResult.StageResult(job.getJobName(), job.getJob().getCounters())); } catch (Exception e) { LOG.error("Exception thrown fetching job counters for stage: " + job.getJobName(), e); } } return new PipelineResult(stages); } }
public int run(String[] args) throws Exception { if (args.length != 1) { System.err.println(); System.err.println("Usage: " + this.getClass().getName() + " [generic options] input"); System.err.println(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Create an object to coordinate pipeline creation and execution. Pipeline pipeline = new MRPipeline(TotalWordCount.class, getConf()); // Reference a given text file as a collection of Strings. PCollection<String> lines = pipeline.readTextFile(args[0]); // Define a function that splits each line in a PCollection of Strings into // a // PCollection made up of the individual words in the file. PCollection<Long> numberOfWords = lines.parallelDo(new DoFn<String, Long>() { public void process(String line, Emitter<Long> emitter) { emitter.emit((long)line.split("\\s+").length); } }, Writables.longs()); // Indicates the serialization format // The aggregate method groups a collection into a single PObject. PObject<Long> totalCount = numberOfWords.aggregate(Aggregators.SUM_LONGS()).first(); // Execute the pipeline as a MapReduce. PipelineResult result = pipeline.run(); System.out.println("Total number of words: " + totalCount.getValue()); pipeline.done(); return result.succeeded() ? 0 : 1; }
set(new PipelineResult( ImmutableList.of(new PipelineResult.StageResult("Spark", getCounters(), start, System.currentTimeMillis())),
if (result.succeeded()) { console.info("Added {} records to \"{}\"", task.getCount(), datasets.get(1));
status.set(Status.SUCCEEDED); result = new PipelineResult(stages, status.get()); set(result);
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println(); System.err.println("Two and only two arguments are accepted."); System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output"); System.err.println(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Create an object to coordinate pipeline creation and execution. Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf()); // Reference a given text file as a collection of Strings. PCollection<String> lines = pipeline.readTextFile(args[0]); // Aggregator used for summing up response size Aggregator<Long> agg = Aggregators.SUM_LONGS(); // Table of (ip, sum(response size)) PTable<String, Long> ipAddrResponseSize = lines .parallelDo(extractIPResponseSize, Writables.tableOf(Writables.strings(), Writables.longs())).groupByKey() .combineValues(agg); pipeline.writeTextFile(ipAddrResponseSize, args[1]); // Execute the pipeline as a MapReduce. PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
set(new PipelineResult( ImmutableList.of(new PipelineResult.StageResult("Spark", getCounters(), start, System.currentTimeMillis())),
@Override public int run() throws IOException { Preconditions.checkArgument(datasets.size() == 1, "Cannot compact multiple datasets"); String uriOrName = datasets.get(0); View<Record> view = load(uriOrName, Record.class); if (isDatasetOrViewUri(uriOrName)) { Preconditions.checkArgument(viewMatches(view.getUri(), uriOrName), "Resolved view does not match requested view: " + view.getUri()); } CompactionTask task = new CompactionTask<Record>(view); task.setConf(getConf()); if (numWriters >= 0) { task.setNumWriters(numWriters); } if (filesPerPartition > 0) { task.setFilesPerPartition(filesPerPartition); } PipelineResult result = task.run(); if (result.succeeded()) { console.info("Compacted {} records in \"{}\"", task.getCount(), uriOrName); return 0; } else { return 1; } }
if (result.succeeded()) { console.info("Added {} records to \"{}\"", task.getCount(), datasets.get(1));
Target.WriteMode.APPEND); return run().succeeded() ? 0 : 1;
return pipeline.done().succeeded() ? 0 : 1;
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println(); System.err.println("Two and only two arguments are accepted."); System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output"); System.err.println(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Create an object to coordinate pipeline creation and execution. Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf()); // Reference a given text file as a collection of Strings. PCollection<String> lines = pipeline.readTextFile(args[0]); // Aggregator used for summing up response size and count Aggregator<Pair<Long, Long>> agg = pairAggregator(SUM_LONGS(), SUM_LONGS()); // Table of (ip, sum(response size), count) PTable<String, Pair<Long, Long>> remoteAddrResponseSize = lines .parallelDo(extractResponseSize, Writables.tableOf(Writables.strings(), Writables.pairs(Writables.longs(), Writables.longs()))).groupByKey() .combineValues(agg); // Calculate average response size by ip address PTable<String, Double> avgs = remoteAddrResponseSize.parallelDo(calulateAverage, Writables.tableOf(Writables.strings(), Writables.doubles())); // write the result to a text file pipeline.writeTextFile(avgs, args[1]); // Execute the pipeline as a MapReduce. PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
if (result.succeeded()) { long count = task.getCount(); if (count > 0) {