org.apache.crunch.PipelineResult java code examples

@Override
public Iterator<E> iterator() {
 if (materialized == null) {
  this.result = pipeline.run();
  if (result.succeeded() || !pipeline.getConfiguration().getBoolean("crunch.empty.materialize.on.failure", false)) {
   materialize();
  } else {
   LOG.error("Pipeline run failed, returning empty iterator");
   return Iterators.emptyIterator();
  }
 }
 return materialized.iterator();
}

public MemExecution() {
 this.res = new PipelineResult(
   ImmutableList.of(new PipelineResult.StageResult("MemPipelineStage", COUNTERS)),
   PipelineExecution.Status.SUCCEEDED);
}

private String getJobInfo(PipelineResult job, boolean isVerbose) {
 StringBuilder buf = new StringBuilder();
 for (StageResult stage : job.getStageResults()) {
  buf.append("\nstageId: " + stage.getStageId() + ", stageName: " + stage.getStageName());
  if (isVerbose) {
   buf.append(", counters: ");
   Map<String, Set<String>> sortedCounterMap = new TreeMap<String, Set<String>>(stage.getCounterNames());
   for (Map.Entry<String, Set<String>> entry : sortedCounterMap.entrySet()) {
    String groupName = entry.getKey();
    buf.append("\n" + groupName);
    Set<String> sortedCounterNames = new TreeSet<String>(entry.getValue());
    for (String counterName : sortedCounterNames) {
     buf.append("\n    " + counterName + " : " + stage.getCounterValue(groupName, counterName));
    }
   }
  }
 }
 return buf.toString();
}

private boolean done(Pipeline job, boolean isVerbose) {
 if (isVerbose) {
  job.enableDebug();
  job.getConfiguration().setBoolean("crunch.log.job.progress", true); // see class RuntimeParameters
 }    
 String name = job.getName();
 LOG.debug("Running pipeline: " + name);
 pipelineResult = job.done();
 boolean success = pipelineResult.succeeded();
 if (success) {
  LOG.info("Succeeded with pipeline: " + name + " " + getJobInfo(pipelineResult, isVerbose));
 } else {
  LOG.error("Pipeline failed: " + name + " " + getJobInfo(pipelineResult, isVerbose));
 }
 return success;
}

 public PipelineResult execute() {
  try {
   Thread controlThread = new Thread(control);
   controlThread.start();
   while (!control.allFinished()) {
    Thread.sleep(1000);
   }
   control.stop();
  } catch (InterruptedException e) {
   LOG.info(e);
  }
  List<CrunchControlledJob> failures = control.getFailedJobList();
  if (!failures.isEmpty()) {
   System.err.println(failures.size() + " job failure(s) occurred:");
   for (CrunchControlledJob job : failures) {
    System.err.println(job.getJobName() + "(" + job.getJobID() + "): " + job.getMessage());
   }
  }
  List<PipelineResult.StageResult> stages = Lists.newArrayList();
  for (CrunchControlledJob job : control.getSuccessfulJobList()) {
   try {
    stages.add(new PipelineResult.StageResult(job.getJobName(), job.getJob().getCounters()));
   } catch (Exception e) {
    LOG.error("Exception thrown fetching job counters for stage: " + job.getJobName(), e);
   }
  }
  return new PipelineResult(stages);
 }
}

StageResult sr = Iterables.getFirst(result.getStageResults(), null);
if (sr != null && MAP_INPUT_RECORDS != null) {
 this.count = sr.getCounterValue(MAP_INPUT_RECORDS);

return result.succeeded() ? 0 : 1;

 public PipelineResult execute() {
  try {
   Thread controlThread = new Thread(control);
   controlThread.start();
   while (!control.allFinished()) {
    Thread.sleep(1000);
   }
   control.stop();
  } catch (InterruptedException e) {
   LOG.info(e);
  }
  List<CrunchControlledJob> failures = control.getFailedJobList();
  if (!failures.isEmpty()) {
   System.err.println(failures.size() + " job failure(s) occurred:");
   for (CrunchControlledJob job : failures) {
    System.err.println(job.getJobName() + "(" + job.getJobID() + "): " + job.getMessage());
   }
  }
  List<PipelineResult.StageResult> stages = Lists.newArrayList();
  for (CrunchControlledJob job : control.getSuccessfulJobList()) {
   try {
    stages.add(new PipelineResult.StageResult(job.getJobName(), job.getJob().getCounters()));
   } catch (Exception e) {
    LOG.error("Exception thrown fetching job counters for stage: " + job.getJobName(), e);
   }
  }
  return new PipelineResult(stages);
 }
}

public int run(String[] args) throws Exception {
 if (args.length != 1) {
  System.err.println();
  System.err.println("Usage: " + this.getClass().getName() + " [generic options] input");
  System.err.println();
  GenericOptionsParser.printGenericCommandUsage(System.err);
  return 1;
 }
 // Create an object to coordinate pipeline creation and execution.
 Pipeline pipeline = new MRPipeline(TotalWordCount.class, getConf());
 // Reference a given text file as a collection of Strings.
 PCollection<String> lines = pipeline.readTextFile(args[0]);
 // Define a function that splits each line in a PCollection of Strings into
 // a
 // PCollection made up of the individual words in the file.
 PCollection<Long> numberOfWords = lines.parallelDo(new DoFn<String, Long>() {
  public void process(String line, Emitter<Long> emitter) {
   emitter.emit((long)line.split("\\s+").length);
  }
 }, Writables.longs()); // Indicates the serialization format
 // The aggregate method groups a collection into a single PObject.
 PObject<Long> totalCount = numberOfWords.aggregate(Aggregators.SUM_LONGS()).first();
 // Execute the pipeline as a MapReduce.
 PipelineResult result = pipeline.run();
 System.out.println("Total number of words: " + totalCount.getValue());
 
 pipeline.done();
 return result.succeeded() ? 0 : 1;
}

set(new PipelineResult(
  ImmutableList.of(new PipelineResult.StageResult("Spark", getCounters(),
    start, System.currentTimeMillis())),

if (result.succeeded()) {
 console.info("Added {} records to \"{}\"",
   task.getCount(), datasets.get(1));

 status.set(Status.SUCCEEDED);
result = new PipelineResult(stages, status.get());
set(result);

public int run(String[] args) throws Exception {
 if (args.length != 2) {
  System.err.println();
  System.err.println("Two and only two arguments are accepted.");
  System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
  System.err.println();
  GenericOptionsParser.printGenericCommandUsage(System.err);
  return 1;
 }
 // Create an object to coordinate pipeline creation and execution.
 Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf());
 // Reference a given text file as a collection of Strings.
 PCollection<String> lines = pipeline.readTextFile(args[0]);
 // Aggregator used for summing up response size
 Aggregator<Long> agg = Aggregators.SUM_LONGS();
 // Table of (ip, sum(response size))
 PTable<String, Long> ipAddrResponseSize = lines
   .parallelDo(extractIPResponseSize, Writables.tableOf(Writables.strings(), Writables.longs())).groupByKey()
   .combineValues(agg);
 pipeline.writeTextFile(ipAddrResponseSize, args[1]);
 // Execute the pipeline as a MapReduce.
 PipelineResult result = pipeline.done();
 return result.succeeded() ? 0 : 1;
}

set(new PipelineResult(
  ImmutableList.of(new PipelineResult.StageResult("Spark", getCounters(),
    start, System.currentTimeMillis())),

@Override
public int run() throws IOException {
 Preconditions.checkArgument(datasets.size() == 1,
   "Cannot compact multiple datasets");
 String uriOrName = datasets.get(0);
 View<Record> view = load(uriOrName, Record.class);
 if (isDatasetOrViewUri(uriOrName)) {
  Preconditions.checkArgument(viewMatches(view.getUri(), uriOrName),
    "Resolved view does not match requested view: " + view.getUri());
 }
 CompactionTask task = new CompactionTask<Record>(view);
 task.setConf(getConf());
 if (numWriters >= 0) {
  task.setNumWriters(numWriters);
 }
 if (filesPerPartition > 0) {
  task.setFilesPerPartition(filesPerPartition);
 }
 PipelineResult result = task.run();
 if (result.succeeded()) {
  console.info("Compacted {} records in \"{}\"",
    task.getCount(), uriOrName);
  return 0;
 } else {
  return 1;
 }
}

if (result.succeeded()) {
 console.info("Added {} records to \"{}\"",
   task.getCount(), datasets.get(1));

  Target.WriteMode.APPEND);
return run().succeeded() ? 0 : 1;

return pipeline.done().succeeded() ? 0 : 1;

public int run(String[] args) throws Exception {
 if (args.length != 2) {
  System.err.println();
  System.err.println("Two and only two arguments are accepted.");
  System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
  System.err.println();
  GenericOptionsParser.printGenericCommandUsage(System.err);
  return 1;
 }
 // Create an object to coordinate pipeline creation and execution.
 Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf());
 // Reference a given text file as a collection of Strings.
 PCollection<String> lines = pipeline.readTextFile(args[0]);
 // Aggregator used for summing up response size and count
 Aggregator<Pair<Long, Long>> agg = pairAggregator(SUM_LONGS(), SUM_LONGS());
 // Table of (ip, sum(response size), count)
 PTable<String, Pair<Long, Long>> remoteAddrResponseSize = lines
   .parallelDo(extractResponseSize,
     Writables.tableOf(Writables.strings(), Writables.pairs(Writables.longs(), Writables.longs()))).groupByKey()
   .combineValues(agg);
 // Calculate average response size by ip address
 PTable<String, Double> avgs = remoteAddrResponseSize.parallelDo(calulateAverage,
   Writables.tableOf(Writables.strings(), Writables.doubles()));
 // write the result to a text file
 pipeline.writeTextFile(avgs, args[1]);
 // Execute the pipeline as a MapReduce.
 PipelineResult result = pipeline.done();
 return result.succeeded() ? 0 : 1;
}

if (result.succeeded()) {
 long count = task.getCount();
 if (count > 0) {

Javadoc

Container for the results of a call to run or done on the Pipeline interface that includes details and statistics about the component stages of the data pipeline.

Most used methods

Popular in Java

Making http post requests using okhttp
compareTo (BigDecimal)
startActivity (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Set (java.util)
A Set is a data structure which does not allow duplicate elements.
Collectors (java.util.stream)
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
JComboBox (javax.swing)
Top plugins for Android Studio

How to usePipelineResult in org.apache.crunch

Best Java code snippets using org.apache.crunch.PipelineResult (Showing top 20 results out of 315)

How to use
PipelineResult
in
org.apache.crunch