Tabnine Logo
PCollection.getPType
Code IndexAdd Tabnine to your IDE (free)

How to use
getPType
method
in
org.apache.crunch.PCollection

Best Java code snippets using org.apache.crunch.PCollection.getPType (Showing top 20 results out of 315)

origin: cloudera/crunch

@Override
public PCollection<S> union(PCollection<S>... collections) {
 Collection<S> output = Lists.newArrayList();    
 for (PCollection<S> pcollect : collections) {
  for (S s : pcollect.materialize()) {
   output.add(s);
  }
 }
 output.addAll(collect);
 return new MemCollection<S>(output, collections[0].getPType());
}
origin: org.apache.crunch/crunch-core

@Override
public <S> PCollection<S> union(List<PCollection<S>> collections) {
 List<S> output = Lists.newArrayList();
 for (PCollection<S> pcollect : collections) {
  Iterables.addAll(output, pcollect.materialize());
 }
 return new MemCollection<S>(output, collections.get(0).getPType());
}
origin: org.apache.crunch/crunch

@Override
public PCollection<S> union(PCollection<S>... collections) {
 Collection<S> output = Lists.newArrayList();
 for (PCollection<S> pcollect : collections) {
  for (S s : pcollect.materialize()) {
   output.add(s);
  }
 }
 output.addAll(collect);
 return new MemCollection<S>(output, collections[0].getPType());
}
origin: cloudera/crunch

 public static <S> PCollection<S> sample(PCollection<S> input, long seed, double probability) {
  String stageName = String.format("sample(%.2f)", probability);
  return input.parallelDo(stageName, new SamplerFn<S>(seed, probability), input.getPType());
 }
}
origin: org.apache.crunch/crunch-core

/**
 * Output records from the given {@code PCollection} using a given seed. Useful for unit
 * testing.
 * 
 * @param input The {@code PCollection} to sample from
 * @param seed The seed for the random number generator
 * @param probability The probability (0.0 &lt; p &lt; 1.0)
 * @return The output {@code PCollection} created from sampling
 */
public static <S> PCollection<S> sample(PCollection<S> input, Long seed, double probability) {
 String stageName = String.format("sample(%.2f)", probability);
 return input.parallelDo(stageName, new SampleFn<S>(probability, seed), input.getPType());
}

origin: org.apache.crunch/crunch

@Override
public PCollection<S> union(PCollection<S>... collections) {
 List<PCollectionImpl<S>> internal = Lists.newArrayList();
 internal.add(this);
 for (PCollection<S> collection : collections) {
  internal.add((PCollectionImpl<S>) collection.parallelDo(IdentityFn.<S>getInstance(), collection.getPType()));
 }
 return new UnionCollection<S>(internal);
}
origin: org.apache.crunch/crunch-core

/**
 * Splits a {@link PCollection} of any {@link Pair} of objects into a Pair of
 * PCollection}, to allow for the output of a DoFn to be handled using
 * separate channels.
 *
 * @param pCollection The {@code PCollection} to split
*/
public static <T, U> Pair<PCollection<T>, PCollection<U>> split(PCollection<Pair<T, U>> pCollection) {
 PType<Pair<T, U>> pt = pCollection.getPType();
 return split(pCollection, pt.getSubTypes().get(0), pt.getSubTypes().get(1));
}
origin: apache/crunch

/**
 * Get the {@link PTypeFamily} representing how elements of this collection may be serialized.
 */
default PTypeFamily ptf() {
  return underlying().getPType().getFamily();
}
origin: org.apache.crunch/crunch-core

@Override
public PCollection<S> union(PCollection<S>... collections) {
 List<PCollectionImpl<S>> internal = Lists.newArrayList();
 internal.add(this);
 for (PCollection<S> collection : collections) {
  internal.add((PCollectionImpl<S>) collection.parallelDo(IdentityFn.<S>getInstance(), collection.getPType()));
 }
 return pipeline.getFactory().createUnionCollection(internal);
}
origin: org.apache.crunch/crunch-core

/**
 * Convert the given {@code PCollection<Pair<K, V>>} to a {@code PTable<K, V>}.
 * @param pcollect The {@code PCollection} to convert
 * @return A {@code PTable} that contains the same data as the input {@code PCollection}
 */
public static <K, V> PTable<K, V> asPTable(PCollection<Pair<K, V>> pcollect) {
 PType<Pair<K, V>> pt = pcollect.getPType();
 PTypeFamily ptf = pt.getFamily();
 PTableType<K, V> ptt = ptf.tableOf(pt.getSubTypes().get(0), pt.getSubTypes().get(1));
 DoFn<Pair<K, V>, Pair<K, V>> id = IdentityFn.getInstance();
 return pcollect.parallelDo("asPTable", id, ptt);
}
origin: org.apache.crunch/crunch

/**
 * Convert the given {@code PCollection<Pair<K, V>>} to a {@code PTable<K, V>}.
 * @param pcollect The {@code PCollection} to convert
 * @return A {@code PTable} that contains the same data as the input {@code PCollection}
 */
public static <K, V> PTable<K, V> asPTable(PCollection<Pair<K, V>> pcollect) {
 PType<Pair<K, V>> pt = pcollect.getPType();
 PTypeFamily ptf = pt.getFamily();
 PTableType<K, V> ptt = ptf.tableOf(pt.getSubTypes().get(0), pt.getSubTypes().get(1));
 DoFn<Pair<K, V>, Pair<K, V>> id = IdentityFn.getInstance();
 return pcollect.parallelDo("asPTable", id, ptt);
}

origin: org.apache.crunch/crunch-core

private static <T> PTable<T, Boolean> toTable(PCollection<T> coll) {
 PTypeFamily typeFamily = coll.getTypeFamily();
 return coll.parallelDo(new DoFn<T, Pair<T, Boolean>>() {
  @Override
  public void process(T input, Emitter<Pair<T, Boolean>> emitter) {
   emitter.emit(Pair.of(input, Boolean.TRUE));
  }
 }, typeFamily.tableOf(coll.getPType(), typeFamily.booleans()));
}
origin: cloudera/crunch

@SuppressWarnings("unchecked")
public void write(PCollection<?> pcollection, Target target) {
 if (pcollection instanceof PGroupedTableImpl) {
  pcollection = ((PGroupedTableImpl<?, ?>) pcollection).ungroup();
 } else if (pcollection instanceof UnionCollection || pcollection instanceof UnionTable) {
  pcollection = pcollection.parallelDo("UnionCollectionWrapper",
    (MapFn) IdentityFn.<Object> getInstance(), pcollection.getPType());
 }
 addOutput((PCollectionImpl<?>) pcollection, target);
}
origin: cloudera/crunch

private static <T> PTable<T, Boolean> toTable(PCollection<T> coll) {
 PTypeFamily typeFamily = coll.getTypeFamily();
 return coll.parallelDo(new DoFn<T, Pair<T, Boolean>>() {
  @Override
  public void process(T input, Emitter<Pair<T, Boolean>> emitter) {
   emitter.emit(Pair.of(input, Boolean.TRUE));
  }
 }, typeFamily.tableOf(coll.getPType(), typeFamily.booleans()));
}
origin: org.apache.crunch/crunch

private static <T> PTable<T, Boolean> toTable(PCollection<T> coll) {
 PTypeFamily typeFamily = coll.getTypeFamily();
 return coll.parallelDo(new DoFn<T, Pair<T, Boolean>>() {
  @Override
  public void process(T input, Emitter<Pair<T, Boolean>> emitter) {
   emitter.emit(Pair.of(input, Boolean.TRUE));
  }
 }, typeFamily.tableOf(coll.getPType(), typeFamily.booleans()));
}
origin: org.apache.crunch/crunch-core

private void writeSequenceFileFromPCollection(final FileSystem fs, final Path path,
  final PCollection collection) throws IOException {
 final PType pType = collection.getPType();
 final Converter converter = pType.getConverter();
 final Class valueClass = converter.getValueClass();
 final SequenceFile.Writer writer = new SequenceFile.Writer(fs, fs.getConf(), path,
   NullWritable.class, valueClass);
 for (final Object o : collection.materialize()) {
  final Object value = pType.getOutputMapFn().map(o);
  writer.append(NullWritable.get(), value);
 }
 writer.close();
}
origin: cloudera/crunch

@Override
public <T> void writeTextFile(PCollection<T> pcollection, String pathName) {
 // Ensure that this is a writable pcollection instance.
 pcollection = pcollection.parallelDo("asText", IdentityFn.<T> getInstance(), WritableTypeFamily
   .getInstance().as(pcollection.getPType()));
 write(pcollection, At.textFile(pathName));
}
origin: org.apache.crunch/crunch-core

 public static <S> PCollection<S> aggregate(PCollection<S> collect, Aggregator<S> aggregator) {
  PTypeFamily tf = collect.getTypeFamily();
  return collect.parallelDo("Aggregate.aggregator", new MapFn<S, Pair<Boolean, S>>() {
   public Pair<Boolean, S> map(S input) {
    return Pair.of(false, input);
   }
  }, tf.tableOf(tf.booleans(), collect.getPType()))
  .groupByKey(1)
  .combineValues(aggregator)
  .values();
 }
}
origin: org.apache.crunch/crunch

/**
 * Returns a {@code PTable} that contains the unique elements of this collection mapped to a count
 * of their occurrences.
 */
public static <S> PTable<S, Long> count(PCollection<S> collect) {
 PTypeFamily tf = collect.getTypeFamily();
 return collect.parallelDo("Aggregate.count", new MapFn<S, Pair<S, Long>>() {
  public Pair<S, Long> map(S input) {
   return Pair.of(input, 1L);
  }
 }, tf.tableOf(collect.getPType(), tf.longs())).groupByKey()
   .combineValues(Aggregators.SUM_LONGS());
}
origin: kite-sdk/kite

private static <E> PCollection<E> partition(PCollection<E> collection,
                      int numReducers) {
 PType<E> type = collection.getPType();
 PTableType<E, Void> tableType = Avros.tableOf(type, Avros.nulls());
 PTable<E, Void> table = collection.parallelDo(new AsKeyTable<E>(), tableType);
 PGroupedTable<E, Void> grouped =
   numReducers > 0 ? table.groupByKey(numReducers) : table.groupByKey();
 return grouped.ungroup().keys();
}
org.apache.crunchPCollectiongetPType

Javadoc

Returns the PType of this PCollection.

Popular methods of PCollection

  • parallelDo
    Applies the given doFn to the elements of this PCollection and returns a new PCollection that is the
  • by
    Apply the given map function to each element of this instance in order to create a PTable.
  • write
    Write the contents of this PCollection to the given Target, using the given Target.WriteMode to hand
  • materialize
    Returns a reference to the data set represented by this PCollection that may be used by the client t
  • getPipeline
    Returns the Pipeline associated with this PCollection.
  • getTypeFamily
    Returns the PTypeFamily of this PCollection.
  • count
    Returns a PTable instance that contains the counts of each unique element of this PCollection.
  • aggregate
    Returns a PCollection that contains the result of aggregating all values in this instance.
  • asReadable
  • cache
    Marks this data as cached using the given CachingOptions. Cached PCollections will only be processed
  • filter
    Apply the given filter function to this instance and return the resulting PCollection.
  • first
  • filter,
  • first,
  • getName,
  • getSize,
  • union

Popular in Java

  • Reactive rest calls using spring rest template
  • getSupportFragmentManager (FragmentActivity)
  • runOnUiThread (Activity)
  • addToBackStack (FragmentTransaction)
  • Rectangle (java.awt)
    A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
  • HttpURLConnection (java.net)
    An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
  • PriorityQueue (java.util)
    A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
  • Set (java.util)
    A Set is a data structure which does not allow duplicate elements.
  • TimerTask (java.util)
    The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
  • ThreadPoolExecutor (java.util.concurrent)
    An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
  • Best plugins for Eclipse
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now