org.apache.crunch.PCollection.getPipeline java code examples

public static <T> int getRecommendedPartitions(PCollection<T> pcollection) {
 Configuration conf = pcollection.getPipeline().getConfiguration();
 return getRecommendedPartitions(pcollection, conf);
}

public static <T> PObject<BloomFilter> createFilter(PCollection<T> collection, BloomFilterFn<T> filterFn) {
 collection.getPipeline().getConfiguration().set(BloomFilterFn.CRUNCH_FILTER_NAME, collection.getName());
 return new FirstElementPObject<BloomFilter>(createFilterTable(collection, filterFn).values());
}

  table.getName().getNameAsString(),
  regionLocator.getAllRegionLocations());
Path regionLocationFilePath = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(),
  "regionLocations" + table.getName().getNameAsString());
 writeRegionLocationTable(cells.getPipeline().getConfiguration(), regionLocationFilePath, regionLocationTable);

Configuration conf = cells.getPipeline().getConfiguration();
PTable<C, Void> t = cells.parallelDo(
  "Pre-partition",
 splitPoints = getSplitPoints(table);
Path partitionFile = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "partition");
writePartitionInfo(conf, partitionFile, splitPoints);
GroupingOptions options = GroupingOptions.builder()

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Tuple4<V1, V2, V3, V4>, Void> sortedPt = pt.groupByKey(options).ungroup();

Configuration conf = cells.getPipeline().getConfiguration();
PTable<C, Void> t = cells.parallelDo(
  "Pre-partition",
 splitPoints = getSplitPoints(regionLocator);
Path partitionFile = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "partition");
writePartitionInfo(conf, partitionFile, splitPoints);
GroupingOptions options = GroupingOptions.builder()

/**
 * Sorts the {@link PCollection} of {@link Tuple4}s using the specified column
 * ordering.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <V1, V2, V3, V4> PCollection<Tuple4<V1, V2, V3, V4>> sortQuads(
  PCollection<Tuple4<V1, V2, V3, V4>> collection, ColumnOrder... columnOrders) {
 PTypeFamily tf = collection.getTypeFamily();
 PType<Tuple4<V1, V2, V3, V4>> pType = collection.getPType();
 @SuppressWarnings("unchecked")
 PTableType<Tuple4<V1, V2, V3, V4>, Void> type = tf.tableOf(tf.quads(pType.getSubTypes().get(0), pType.getSubTypes()
   .get(1), pType.getSubTypes().get(2), pType.getSubTypes().get(3)), tf.nulls());
 PTable<Tuple4<V1, V2, V3, V4>, Void> pt = collection.parallelDo(
   new DoFn<Tuple4<V1, V2, V3, V4>, Pair<Tuple4<V1, V2, V3, V4>, Void>>() {
    @Override
    public void process(Tuple4<V1, V2, V3, V4> input, Emitter<Pair<Tuple4<V1, V2, V3, V4>, Void>> emitter) {
     emitter.emit(Pair.of(input, (Void) null));
    }
   }, type);
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
 PTable<Tuple4<V1, V2, V3, V4>, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo(new DoFn<Pair<Tuple4<V1, V2, V3, V4>, Void>, Tuple4<V1, V2, V3, V4>>() {
  @Override
  public void process(Pair<Tuple4<V1, V2, V3, V4>, Void> input, Emitter<Tuple4<V1, V2, V3, V4>> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}

/**
 * Sorts the {@link PCollection} of {@link Tuple3}s using the specified column
 * ordering.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <V1, V2, V3> PCollection<Tuple3<V1, V2, V3>> sortTriples(PCollection<Tuple3<V1, V2, V3>> collection,
  ColumnOrder... columnOrders) {
 PTypeFamily tf = collection.getTypeFamily();
 PType<Tuple3<V1, V2, V3>> pType = collection.getPType();
 @SuppressWarnings("unchecked")
 PTableType<Tuple3<V1, V2, V3>, Void> type = tf.tableOf(
   tf.triples(pType.getSubTypes().get(0), pType.getSubTypes().get(1), pType.getSubTypes().get(2)), tf.nulls());
 PTable<Tuple3<V1, V2, V3>, Void> pt = collection.parallelDo(
   new DoFn<Tuple3<V1, V2, V3>, Pair<Tuple3<V1, V2, V3>, Void>>() {
    @Override
    public void process(Tuple3<V1, V2, V3> input, Emitter<Pair<Tuple3<V1, V2, V3>, Void>> emitter) {
     emitter.emit(Pair.of(input, (Void) null));
    }
   }, type);
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
 PTable<Tuple3<V1, V2, V3>, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo(new DoFn<Pair<Tuple3<V1, V2, V3>, Void>, Tuple3<V1, V2, V3>>() {
  @Override
  public void process(Pair<Tuple3<V1, V2, V3>, Void> input, Emitter<Tuple3<V1, V2, V3>> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Tuple3<V1, V2, V3>, Void> sortedPt = pt.groupByKey(options).ungroup();

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Pair<U, V>, Void> sortedPt = pt.groupByKey(options).ungroup();

/**
 * Sorts the {@code PCollection} of {@link TupleN}s using the specified column
 * ordering and a client-specified number of reducers.
 * 
 * @return a {@code PCollection} representing the sorted collection.
 */
public static <T extends Tuple> PCollection<T> sortTuples(PCollection<T> collection, int numReducers,
  ColumnOrder... columnOrders) {
 PType<T> pType = collection.getPType();
 SortFns.KeyExtraction<T> ke = new SortFns.KeyExtraction<T>(pType, columnOrders);
 PTable<Object, T> pt = collection.by(ke.getByFn(), ke.getKeyType());
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, columnOrders);
 return pt.groupByKey(options).ungroup().values();
}

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Pair<U, V>, Void> sortedPt = pt.groupByKey(options).ungroup();

/**
 * Sorts the {@code PCollection} using the natural ordering of its elements in
 * the order specified using the given number of reducers.
 * 
 * @return a {@code PCollection} representing the sorted collection.
 */
public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) {
 PTypeFamily tf = collection.getTypeFamily();
 PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
 Configuration conf = collection.getPipeline().getConfiguration();
 PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
  @Override
  public void process(T input, Emitter<Pair<T, Void>> emitter) {
   emitter.emit(Pair.of(input, (Void) null));
  }
 }, type);
 GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, order);
 return pt.groupByKey(options).ungroup().keys();
}

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<TupleN, Void> sortedPt = pt.groupByKey(options).ungroup();

/**
 * Sorts the {@link PCollection} of {@link TupleN}s using the specified column
 * ordering.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static PCollection<TupleN> sortTuples(PCollection<TupleN> collection, ColumnOrder... columnOrders) {
 PTypeFamily tf = collection.getTypeFamily();
 PType<TupleN> pType = collection.getPType();
 PTableType<TupleN, Void> type = tf.tableOf(tf.tuples(pType.getSubTypes().toArray(new PType[0])), tf.nulls());
 PTable<TupleN, Void> pt = collection.parallelDo(new DoFn<TupleN, Pair<TupleN, Void>>() {
  @Override
  public void process(TupleN input, Emitter<Pair<TupleN, Void>> emitter) {
   emitter.emit(Pair.of(input, (Void) null));
  }
 }, type);
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
 PTable<TupleN, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo(new DoFn<Pair<TupleN, Void>, TupleN>() {
  @Override
  public void process(Pair<TupleN, Void> input, Emitter<TupleN> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}

/**
 * Sorts the {@link PCollection} using the natural ordering of its elements
 * in the order specified.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
 PTypeFamily tf = collection.getTypeFamily();
 PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf,
   collection.getPType(), order);
 PTable<T, Void> pt =
  collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
   @Override
   public void process(T input,
     Emitter<Pair<T, Void>> emitter) {
    emitter.emit(Pair.of(input, (Void) null));
   }
  }, type);
 PTable<T, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo("sort-post", new DoFn<Pair<T, Void>, T>() {
  @Override
  public void process(Pair<T, Void> input, Emitter<T> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}

/**
 * Sorts the {@link PCollection} using the natural ordering of its elements in
 * the order specified.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
 PTypeFamily tf = collection.getTypeFamily();
 PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, collection.getPType(), order);
 PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
  @Override
  public void process(T input, Emitter<Pair<T, Void>> emitter) {
   emitter.emit(Pair.of(input, (Void) null));
  }
 }, type);
 PTable<T, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo("sort-post", new DoFn<Pair<T, Void>, T>() {
  @Override
  public void process(Pair<T, Void> input, Emitter<T> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}

Javadoc

Returns the Pipeline associated with this PCollection.

Popular methods of PCollection

parallelDo
Applies the given doFn to the elements of this PCollection and returns a new PCollection that is the
getPType
Returns the PType of this PCollection.
by
Apply the given map function to each element of this instance in order to create a PTable.
write
Write the contents of this PCollection to the given Target, using the given Target.WriteMode to hand
materialize
Returns a reference to the data set represented by this PCollection that may be used by the client t
getTypeFamily
Returns the PTypeFamily of this PCollection.
count
Returns a PTable instance that contains the counts of each unique element of this PCollection.
aggregate
Returns a PCollection that contains the result of aggregating all values in this instance.
asReadable
cache
Marks this data as cached using the given CachingOptions. Cached PCollections will only be processed
filter
Apply the given filter function to this instance and return the resulting PCollection.
first

Popular in Java

Reactive rest calls using spring rest template
startActivity (Activity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
putExtra (Intent)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
IsNull (org.hamcrest.core)
Is the value null?
Top PhpStorm plugins

How to use getPipelinemethodin org.apache.crunch.PCollection

Best Java code snippets using org.apache.crunch.PCollection.getPipeline (Showing top 17 results out of 315)

How to use
getPipeline
method
in
org.apache.crunch.PCollection