Tabnine Logo
PCollection.getPipeline
Code IndexAdd Tabnine to your IDE (free)

How to use
getPipeline
method
in
org.apache.crunch.PCollection

Best Java code snippets using org.apache.crunch.PCollection.getPipeline (Showing top 17 results out of 315)

origin: org.apache.crunch/crunch-core

public static <T> int getRecommendedPartitions(PCollection<T> pcollection) {
 Configuration conf = pcollection.getPipeline().getConfiguration();
 return getRecommendedPartitions(pcollection, conf);
}
origin: apache/crunch

public static <T> PObject<BloomFilter> createFilter(PCollection<T> collection, BloomFilterFn<T> filterFn) {
 collection.getPipeline().getConfiguration().set(BloomFilterFn.CRUNCH_FILTER_NAME, collection.getName());
 return new FirstElementPObject<BloomFilter>(createFilterTable(collection, filterFn).values());
}
origin: apache/crunch

  table.getName().getNameAsString(),
  regionLocator.getAllRegionLocations());
Path regionLocationFilePath = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(),
  "regionLocations" + table.getName().getNameAsString());
 writeRegionLocationTable(cells.getPipeline().getConfiguration(), regionLocationFilePath, regionLocationTable);
origin: org.apache.crunch/crunch-hbase

Configuration conf = cells.getPipeline().getConfiguration();
PTable<C, Void> t = cells.parallelDo(
  "Pre-partition",
 splitPoints = getSplitPoints(table);
Path partitionFile = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "partition");
writePartitionInfo(conf, partitionFile, splitPoints);
GroupingOptions options = GroupingOptions.builder()
origin: cloudera/crunch

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Tuple4<V1, V2, V3, V4>, Void> sortedPt = pt.groupByKey(options).ungroup();
origin: apache/crunch

Configuration conf = cells.getPipeline().getConfiguration();
PTable<C, Void> t = cells.parallelDo(
  "Pre-partition",
 splitPoints = getSplitPoints(regionLocator);
Path partitionFile = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "partition");
writePartitionInfo(conf, partitionFile, splitPoints);
GroupingOptions options = GroupingOptions.builder()
origin: org.apache.crunch/crunch

/**
 * Sorts the {@link PCollection} of {@link Tuple4}s using the specified column
 * ordering.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <V1, V2, V3, V4> PCollection<Tuple4<V1, V2, V3, V4>> sortQuads(
  PCollection<Tuple4<V1, V2, V3, V4>> collection, ColumnOrder... columnOrders) {
 PTypeFamily tf = collection.getTypeFamily();
 PType<Tuple4<V1, V2, V3, V4>> pType = collection.getPType();
 @SuppressWarnings("unchecked")
 PTableType<Tuple4<V1, V2, V3, V4>, Void> type = tf.tableOf(tf.quads(pType.getSubTypes().get(0), pType.getSubTypes()
   .get(1), pType.getSubTypes().get(2), pType.getSubTypes().get(3)), tf.nulls());
 PTable<Tuple4<V1, V2, V3, V4>, Void> pt = collection.parallelDo(
   new DoFn<Tuple4<V1, V2, V3, V4>, Pair<Tuple4<V1, V2, V3, V4>, Void>>() {
    @Override
    public void process(Tuple4<V1, V2, V3, V4> input, Emitter<Pair<Tuple4<V1, V2, V3, V4>, Void>> emitter) {
     emitter.emit(Pair.of(input, (Void) null));
    }
   }, type);
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
 PTable<Tuple4<V1, V2, V3, V4>, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo(new DoFn<Pair<Tuple4<V1, V2, V3, V4>, Void>, Tuple4<V1, V2, V3, V4>>() {
  @Override
  public void process(Pair<Tuple4<V1, V2, V3, V4>, Void> input, Emitter<Tuple4<V1, V2, V3, V4>> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}
origin: org.apache.crunch/crunch

/**
 * Sorts the {@link PCollection} of {@link Tuple3}s using the specified column
 * ordering.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <V1, V2, V3> PCollection<Tuple3<V1, V2, V3>> sortTriples(PCollection<Tuple3<V1, V2, V3>> collection,
  ColumnOrder... columnOrders) {
 PTypeFamily tf = collection.getTypeFamily();
 PType<Tuple3<V1, V2, V3>> pType = collection.getPType();
 @SuppressWarnings("unchecked")
 PTableType<Tuple3<V1, V2, V3>, Void> type = tf.tableOf(
   tf.triples(pType.getSubTypes().get(0), pType.getSubTypes().get(1), pType.getSubTypes().get(2)), tf.nulls());
 PTable<Tuple3<V1, V2, V3>, Void> pt = collection.parallelDo(
   new DoFn<Tuple3<V1, V2, V3>, Pair<Tuple3<V1, V2, V3>, Void>>() {
    @Override
    public void process(Tuple3<V1, V2, V3> input, Emitter<Pair<Tuple3<V1, V2, V3>, Void>> emitter) {
     emitter.emit(Pair.of(input, (Void) null));
    }
   }, type);
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
 PTable<Tuple3<V1, V2, V3>, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo(new DoFn<Pair<Tuple3<V1, V2, V3>, Void>, Tuple3<V1, V2, V3>>() {
  @Override
  public void process(Pair<Tuple3<V1, V2, V3>, Void> input, Emitter<Tuple3<V1, V2, V3>> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}
origin: cloudera/crunch

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Tuple3<V1, V2, V3>, Void> sortedPt = pt.groupByKey(options).ungroup();
origin: org.apache.crunch/crunch

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Pair<U, V>, Void> sortedPt = pt.groupByKey(options).ungroup();
origin: org.apache.crunch/crunch-core

/**
 * Sorts the {@code PCollection} of {@link TupleN}s using the specified column
 * ordering and a client-specified number of reducers.
 * 
 * @return a {@code PCollection} representing the sorted collection.
 */
public static <T extends Tuple> PCollection<T> sortTuples(PCollection<T> collection, int numReducers,
  ColumnOrder... columnOrders) {
 PType<T> pType = collection.getPType();
 SortFns.KeyExtraction<T> ke = new SortFns.KeyExtraction<T>(pType, columnOrders);
 PTable<Object, T> pt = collection.by(ke.getByFn(), ke.getKeyType());
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, columnOrders);
 return pt.groupByKey(options).ungroup().values();
}
origin: cloudera/crunch

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<Pair<U, V>, Void> sortedPt = pt.groupByKey(options).ungroup();
origin: org.apache.crunch/crunch-core

/**
 * Sorts the {@code PCollection} using the natural ordering of its elements in
 * the order specified using the given number of reducers.
 * 
 * @return a {@code PCollection} representing the sorted collection.
 */
public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) {
 PTypeFamily tf = collection.getTypeFamily();
 PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
 Configuration conf = collection.getPipeline().getConfiguration();
 PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
  @Override
  public void process(T input, Emitter<Pair<T, Void>> emitter) {
   emitter.emit(Pair.of(input, (Void) null));
  }
 }, type);
 GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, order);
 return pt.groupByKey(options).ungroup().keys();
}
origin: cloudera/crunch

Configuration conf = collection.getPipeline().getConfiguration();
GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
PTable<TupleN, Void> sortedPt = pt.groupByKey(options).ungroup();
origin: org.apache.crunch/crunch

/**
 * Sorts the {@link PCollection} of {@link TupleN}s using the specified column
 * ordering.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static PCollection<TupleN> sortTuples(PCollection<TupleN> collection, ColumnOrder... columnOrders) {
 PTypeFamily tf = collection.getTypeFamily();
 PType<TupleN> pType = collection.getPType();
 PTableType<TupleN, Void> type = tf.tableOf(tf.tuples(pType.getSubTypes().toArray(new PType[0])), tf.nulls());
 PTable<TupleN, Void> pt = collection.parallelDo(new DoFn<TupleN, Pair<TupleN, Void>>() {
  @Override
  public void process(TupleN input, Emitter<Pair<TupleN, Void>> emitter) {
   emitter.emit(Pair.of(input, (Void) null));
  }
 }, type);
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders);
 PTable<TupleN, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo(new DoFn<Pair<TupleN, Void>, TupleN>() {
  @Override
  public void process(Pair<TupleN, Void> input, Emitter<TupleN> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}
origin: cloudera/crunch

/**
 * Sorts the {@link PCollection} using the natural ordering of its elements
 * in the order specified.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
 PTypeFamily tf = collection.getTypeFamily();
 PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf,
   collection.getPType(), order);
 PTable<T, Void> pt =
  collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
   @Override
   public void process(T input,
     Emitter<Pair<T, Void>> emitter) {
    emitter.emit(Pair.of(input, (Void) null));
   }
  }, type);
 PTable<T, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo("sort-post", new DoFn<Pair<T, Void>, T>() {
  @Override
  public void process(Pair<T, Void> input, Emitter<T> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}

origin: org.apache.crunch/crunch

/**
 * Sorts the {@link PCollection} using the natural ordering of its elements in
 * the order specified.
 * 
 * @return a {@link PCollection} representing the sorted collection.
 */
public static <T> PCollection<T> sort(PCollection<T> collection, Order order) {
 PTypeFamily tf = collection.getTypeFamily();
 PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls());
 Configuration conf = collection.getPipeline().getConfiguration();
 GroupingOptions options = buildGroupingOptions(conf, tf, collection.getPType(), order);
 PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() {
  @Override
  public void process(T input, Emitter<Pair<T, Void>> emitter) {
   emitter.emit(Pair.of(input, (Void) null));
  }
 }, type);
 PTable<T, Void> sortedPt = pt.groupByKey(options).ungroup();
 return sortedPt.parallelDo("sort-post", new DoFn<Pair<T, Void>, T>() {
  @Override
  public void process(Pair<T, Void> input, Emitter<T> emitter) {
   emitter.emit(input.first());
  }
 }, collection.getPType());
}
org.apache.crunchPCollectiongetPipeline

Javadoc

Returns the Pipeline associated with this PCollection.

Popular methods of PCollection

  • parallelDo
    Applies the given doFn to the elements of this PCollection and returns a new PCollection that is the
  • getPType
    Returns the PType of this PCollection.
  • by
    Apply the given map function to each element of this instance in order to create a PTable.
  • write
    Write the contents of this PCollection to the given Target, using the given Target.WriteMode to hand
  • materialize
    Returns a reference to the data set represented by this PCollection that may be used by the client t
  • getTypeFamily
    Returns the PTypeFamily of this PCollection.
  • count
    Returns a PTable instance that contains the counts of each unique element of this PCollection.
  • aggregate
    Returns a PCollection that contains the result of aggregating all values in this instance.
  • asReadable
  • cache
    Marks this data as cached using the given CachingOptions. Cached PCollections will only be processed
  • filter
    Apply the given filter function to this instance and return the resulting PCollection.
  • first
  • filter,
  • first,
  • getName,
  • getSize,
  • union

Popular in Java

  • Reading from database using SQL prepared statement
  • onRequestPermissionsResult (Fragment)
  • putExtra (Intent)
  • scheduleAtFixedRate (ScheduledExecutorService)
  • VirtualMachine (com.sun.tools.attach)
    A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
  • Collections (java.util)
    This class consists exclusively of static methods that operate on or return collections. It contains
  • Servlet (javax.servlet)
    Defines methods that all servlets must implement. A servlet is a small Java program that runs within
  • JList (javax.swing)
  • Runner (org.openjdk.jmh.runner)
  • LoggerFactory (org.slf4j)
    The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
  • Top PhpStorm plugins
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now