public static <T> int getRecommendedPartitions(PCollection<T> pcollection) { Configuration conf = pcollection.getPipeline().getConfiguration(); return getRecommendedPartitions(pcollection, conf); }
public static <T> PObject<BloomFilter> createFilter(PCollection<T> collection, BloomFilterFn<T> filterFn) { collection.getPipeline().getConfiguration().set(BloomFilterFn.CRUNCH_FILTER_NAME, collection.getName()); return new FirstElementPObject<BloomFilter>(createFilterTable(collection, filterFn).values()); }
table.getName().getNameAsString(), regionLocator.getAllRegionLocations()); Path regionLocationFilePath = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "regionLocations" + table.getName().getNameAsString()); writeRegionLocationTable(cells.getPipeline().getConfiguration(), regionLocationFilePath, regionLocationTable);
Configuration conf = cells.getPipeline().getConfiguration(); PTable<C, Void> t = cells.parallelDo( "Pre-partition", splitPoints = getSplitPoints(table); Path partitionFile = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "partition"); writePartitionInfo(conf, partitionFile, splitPoints); GroupingOptions options = GroupingOptions.builder()
Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<Tuple4<V1, V2, V3, V4>, Void> sortedPt = pt.groupByKey(options).ungroup();
Configuration conf = cells.getPipeline().getConfiguration(); PTable<C, Void> t = cells.parallelDo( "Pre-partition", splitPoints = getSplitPoints(regionLocator); Path partitionFile = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "partition"); writePartitionInfo(conf, partitionFile, splitPoints); GroupingOptions options = GroupingOptions.builder()
/** * Sorts the {@link PCollection} of {@link Tuple4}s using the specified column * ordering. * * @return a {@link PCollection} representing the sorted collection. */ public static <V1, V2, V3, V4> PCollection<Tuple4<V1, V2, V3, V4>> sortQuads( PCollection<Tuple4<V1, V2, V3, V4>> collection, ColumnOrder... columnOrders) { PTypeFamily tf = collection.getTypeFamily(); PType<Tuple4<V1, V2, V3, V4>> pType = collection.getPType(); @SuppressWarnings("unchecked") PTableType<Tuple4<V1, V2, V3, V4>, Void> type = tf.tableOf(tf.quads(pType.getSubTypes().get(0), pType.getSubTypes() .get(1), pType.getSubTypes().get(2), pType.getSubTypes().get(3)), tf.nulls()); PTable<Tuple4<V1, V2, V3, V4>, Void> pt = collection.parallelDo( new DoFn<Tuple4<V1, V2, V3, V4>, Pair<Tuple4<V1, V2, V3, V4>, Void>>() { @Override public void process(Tuple4<V1, V2, V3, V4> input, Emitter<Pair<Tuple4<V1, V2, V3, V4>, Void>> emitter) { emitter.emit(Pair.of(input, (Void) null)); } }, type); Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<Tuple4<V1, V2, V3, V4>, Void> sortedPt = pt.groupByKey(options).ungroup(); return sortedPt.parallelDo(new DoFn<Pair<Tuple4<V1, V2, V3, V4>, Void>, Tuple4<V1, V2, V3, V4>>() { @Override public void process(Pair<Tuple4<V1, V2, V3, V4>, Void> input, Emitter<Tuple4<V1, V2, V3, V4>> emitter) { emitter.emit(input.first()); } }, collection.getPType()); }
/** * Sorts the {@link PCollection} of {@link Tuple3}s using the specified column * ordering. * * @return a {@link PCollection} representing the sorted collection. */ public static <V1, V2, V3> PCollection<Tuple3<V1, V2, V3>> sortTriples(PCollection<Tuple3<V1, V2, V3>> collection, ColumnOrder... columnOrders) { PTypeFamily tf = collection.getTypeFamily(); PType<Tuple3<V1, V2, V3>> pType = collection.getPType(); @SuppressWarnings("unchecked") PTableType<Tuple3<V1, V2, V3>, Void> type = tf.tableOf( tf.triples(pType.getSubTypes().get(0), pType.getSubTypes().get(1), pType.getSubTypes().get(2)), tf.nulls()); PTable<Tuple3<V1, V2, V3>, Void> pt = collection.parallelDo( new DoFn<Tuple3<V1, V2, V3>, Pair<Tuple3<V1, V2, V3>, Void>>() { @Override public void process(Tuple3<V1, V2, V3> input, Emitter<Pair<Tuple3<V1, V2, V3>, Void>> emitter) { emitter.emit(Pair.of(input, (Void) null)); } }, type); Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<Tuple3<V1, V2, V3>, Void> sortedPt = pt.groupByKey(options).ungroup(); return sortedPt.parallelDo(new DoFn<Pair<Tuple3<V1, V2, V3>, Void>, Tuple3<V1, V2, V3>>() { @Override public void process(Pair<Tuple3<V1, V2, V3>, Void> input, Emitter<Tuple3<V1, V2, V3>> emitter) { emitter.emit(input.first()); } }, collection.getPType()); }
Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<Tuple3<V1, V2, V3>, Void> sortedPt = pt.groupByKey(options).ungroup();
Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<Pair<U, V>, Void> sortedPt = pt.groupByKey(options).ungroup();
/** * Sorts the {@code PCollection} of {@link TupleN}s using the specified column * ordering and a client-specified number of reducers. * * @return a {@code PCollection} representing the sorted collection. */ public static <T extends Tuple> PCollection<T> sortTuples(PCollection<T> collection, int numReducers, ColumnOrder... columnOrders) { PType<T> pType = collection.getPType(); SortFns.KeyExtraction<T> ke = new SortFns.KeyExtraction<T>(pType, columnOrders); PTable<Object, T> pt = collection.by(ke.getByFn(), ke.getKeyType()); Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, columnOrders); return pt.groupByKey(options).ungroup().values(); }
Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<Pair<U, V>, Void> sortedPt = pt.groupByKey(options).ungroup();
/** * Sorts the {@code PCollection} using the natural ordering of its elements in * the order specified using the given number of reducers. * * @return a {@code PCollection} representing the sorted collection. */ public static <T> PCollection<T> sort(PCollection<T> collection, int numReducers, Order order) { PTypeFamily tf = collection.getTypeFamily(); PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls()); Configuration conf = collection.getPipeline().getConfiguration(); PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() { @Override public void process(T input, Emitter<Pair<T, Void>> emitter) { emitter.emit(Pair.of(input, (Void) null)); } }, type); GroupingOptions options = buildGroupingOptions(pt, conf, numReducers, order); return pt.groupByKey(options).ungroup().keys(); }
Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<TupleN, Void> sortedPt = pt.groupByKey(options).ungroup();
/** * Sorts the {@link PCollection} of {@link TupleN}s using the specified column * ordering. * * @return a {@link PCollection} representing the sorted collection. */ public static PCollection<TupleN> sortTuples(PCollection<TupleN> collection, ColumnOrder... columnOrders) { PTypeFamily tf = collection.getTypeFamily(); PType<TupleN> pType = collection.getPType(); PTableType<TupleN, Void> type = tf.tableOf(tf.tuples(pType.getSubTypes().toArray(new PType[0])), tf.nulls()); PTable<TupleN, Void> pt = collection.parallelDo(new DoFn<TupleN, Pair<TupleN, Void>>() { @Override public void process(TupleN input, Emitter<Pair<TupleN, Void>> emitter) { emitter.emit(Pair.of(input, (Void) null)); } }, type); Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, pType, columnOrders); PTable<TupleN, Void> sortedPt = pt.groupByKey(options).ungroup(); return sortedPt.parallelDo(new DoFn<Pair<TupleN, Void>, TupleN>() { @Override public void process(Pair<TupleN, Void> input, Emitter<TupleN> emitter) { emitter.emit(input.first()); } }, collection.getPType()); }
/** * Sorts the {@link PCollection} using the natural ordering of its elements * in the order specified. * * @return a {@link PCollection} representing the sorted collection. */ public static <T> PCollection<T> sort(PCollection<T> collection, Order order) { PTypeFamily tf = collection.getTypeFamily(); PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls()); Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, collection.getPType(), order); PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() { @Override public void process(T input, Emitter<Pair<T, Void>> emitter) { emitter.emit(Pair.of(input, (Void) null)); } }, type); PTable<T, Void> sortedPt = pt.groupByKey(options).ungroup(); return sortedPt.parallelDo("sort-post", new DoFn<Pair<T, Void>, T>() { @Override public void process(Pair<T, Void> input, Emitter<T> emitter) { emitter.emit(input.first()); } }, collection.getPType()); }
/** * Sorts the {@link PCollection} using the natural ordering of its elements in * the order specified. * * @return a {@link PCollection} representing the sorted collection. */ public static <T> PCollection<T> sort(PCollection<T> collection, Order order) { PTypeFamily tf = collection.getTypeFamily(); PTableType<T, Void> type = tf.tableOf(collection.getPType(), tf.nulls()); Configuration conf = collection.getPipeline().getConfiguration(); GroupingOptions options = buildGroupingOptions(conf, tf, collection.getPType(), order); PTable<T, Void> pt = collection.parallelDo("sort-pre", new DoFn<T, Pair<T, Void>>() { @Override public void process(T input, Emitter<Pair<T, Void>> emitter) { emitter.emit(Pair.of(input, (Void) null)); } }, type); PTable<T, Void> sortedPt = pt.groupByKey(options).ungroup(); return sortedPt.parallelDo("sort-post", new DoFn<Pair<T, Void>, T>() { @Override public void process(Pair<T, Void> input, Emitter<T> emitter) { emitter.emit(input.first()); } }, collection.getPType()); }