Tabnine Logo
PCollection.materialize
Code IndexAdd Tabnine to your IDE (free)

How to use
materialize
method
in
org.apache.crunch.PCollection

Best Java code snippets using org.apache.crunch.PCollection.materialize (Showing top 19 results out of 315)

origin: cloudera/crunch

@Override
public <T> Iterable<T> materialize(PCollection<T> pcollection) {
 return pcollection.materialize();
}
origin: org.apache.crunch/crunch-core

@Override
public <T> Iterable<T> materialize(PCollection<T> pcollection) {
 return pcollection.materialize();
}
origin: org.apache.crunch/crunch

@Override
public <T> Iterable<T> materialize(PCollection<T> pcollection) {
 return pcollection.materialize();
}
origin: org.apache.crunch/crunch-core

/**
 * Constructs a new instance of this {@code PObject} implementation.
 *
 * @param collect The backing {@code PCollection} for this {@code PObject}.
 */
public PObjectImpl(PCollection<S> collect) {
 this.name = collect.toString();
 this.iterable = collect.materialize();
 this.cachedValue = null;
 this.isCached = false;
}
origin: org.apache.crunch/crunch

/** {@inheritDoc} */
@Override
public final T getValue() {
 if (!isCached) {
  cachedValue = process(collection.materialize());
  isCached = true;
 }
 return cachedValue;
}
origin: apache/crunch

/**
 * Obtain the contents of this LCollection as a {@link Stream} that can be processed locally. Note, this may trigger
 * your job to execute in a distributed environment if the pipeline has not yet been run.
 */
default Stream<S> materialize() {
  return StreamSupport.stream(underlying().materialize().spliterator(), false);
}
origin: org.apache.crunch/crunch-core

@Override
public <S> PCollection<S> union(List<PCollection<S>> collections) {
 List<S> output = Lists.newArrayList();
 for (PCollection<S> pcollect : collections) {
  Iterables.addAll(output, pcollect.materialize());
 }
 return new MemCollection<S>(output, collections.get(0).getPType());
}
origin: cloudera/crunch

@Override
public PCollection<S> union(PCollection<S>... collections) {
 Collection<S> output = Lists.newArrayList();    
 for (PCollection<S> pcollect : collections) {
  for (S s : pcollect.materialize()) {
   output.add(s);
  }
 }
 output.addAll(collect);
 return new MemCollection<S>(output, collections[0].getPType());
}
origin: org.apache.crunch/crunch

@Override
public PCollection<S> union(PCollection<S>... collections) {
 Collection<S> output = Lists.newArrayList();
 for (PCollection<S> pcollect : collections) {
  for (S s : pcollect.materialize()) {
   output.add(s);
  }
 }
 output.addAll(collect);
 return new MemCollection<S>(output, collections[0].getPType());
}
origin: org.apache.crunch/crunch-core

private Set<Target> getDependencies(PipelineCallable<?> callable) {
 Set<Target> deps = Sets.newHashSet(callable.getAllTargets().values());
 for (PCollection pc : callable.getAllPCollections().values()) {
  PCollectionImpl pcImpl = (PCollectionImpl) pc;
  deps.addAll(pcImpl.getTargetDependencies());
  MaterializableIterable iter = (MaterializableIterable) pc.materialize();
  Source pcSrc = iter.getSource();
  if (pcSrc instanceof Target) {
   deps.add((Target) pcSrc);
  }
 }
 return deps;
}
origin: org.apache.crunch/crunch-hbase

private static <C> List<KeyValue> getSplitPoints(HTable table, PTable<C, Void> affectedRows) throws IOException {
 List<byte[]> startKeys;
 try {
  startKeys = Lists.newArrayList(table.getStartKeys());
  if (startKeys.isEmpty()) {
   throw new AssertionError(table + " has no regions!");
  }
 } catch (IOException e) {
  throw new CrunchRuntimeException(e);
 }
 Collections.sort(startKeys, Bytes.BYTES_COMPARATOR);
 Iterable<ByteBuffer> bufferedStartKeys = affectedRows
     .parallelDo(new DetermineAffectedRegionsFn(startKeys), Writables.bytes()).materialize();
 // set to get rid of the potential duplicate start keys emitted
 ImmutableSet.Builder<KeyValue> startKeyBldr = ImmutableSet.builder();
 for (final ByteBuffer bufferedStartKey : bufferedStartKeys) {
  startKeyBldr.add(KeyValueUtil.createFirstOnRow(bufferedStartKey.array()));
 }
 return ImmutableList.copyOf(startKeyBldr.build());
}
origin: cloudera/crunch

@Override
public void write(PCollection<?> collection, Target target) {
 if (target instanceof PathTarget) {
  Path path = ((PathTarget) target).getPath();
  try {
   FileSystem fs = FileSystem.get(conf);
   FSDataOutputStream os = fs.create(new Path(path, "out"));
   if (collection instanceof PTable) {
    for (Object o : collection.materialize()) {
     Pair p = (Pair) o;
     os.writeBytes(p.first().toString());
     os.writeBytes("\t");
     os.writeBytes(p.second().toString());
     os.writeBytes("\r\n");
    }
   } else {
    for (Object o : collection.materialize()) {
     os.writeBytes(o.toString() + "\r\n");
    }
   }
   os.close();
  } catch (IOException e) {
   LOG.error("Exception writing target: " + target, e);
  }
 } else {
  LOG.error("Target " + target + " is not a PathTarget instance");
 }
}
origin: apache/crunch

private static <C> List<KeyValue> getSplitPoints(RegionLocator regionLocator, PTable<C, Void> affectedRows) throws IOException {
 List<byte[]> startKeys;
 try {
  startKeys = Lists.newArrayList(regionLocator.getStartKeys());
  if (startKeys.isEmpty()) {
   throw new AssertionError(regionLocator.getName().getNameAsString() + " has no regions!");
  }
 } catch (IOException e) {
  throw new CrunchRuntimeException(e);
 }
 Collections.sort(startKeys, Bytes.BYTES_COMPARATOR);
 Iterable<ByteBuffer> bufferedStartKeys = affectedRows
     .parallelDo(new DetermineAffectedRegionsFn(startKeys), Writables.bytes()).materialize();
 // set to get rid of the potential duplicate start keys emitted
 ImmutableSet.Builder<KeyValue> startKeyBldr = ImmutableSet.builder();
 for (final ByteBuffer bufferedStartKey : bufferedStartKeys) {
  startKeyBldr.add(KeyValueUtil.createFirstOnRow(bufferedStartKey.array()));
 }
 return ImmutableList.copyOf(startKeyBldr.build());
}
origin: org.apache.crunch/crunch-core

if (collection instanceof PTable) {
 byte[] tab = "\t".getBytes(Charsets.UTF_8);
 for (Object o : collection.materialize()) {
  Pair p = (Pair) o;
  os.write(p.first().toString().getBytes(Charsets.UTF_8));
 for (Object o : collection.materialize()) {
  os.write(o.toString().getBytes(Charsets.UTF_8));
  os.write(newLine);
origin: org.apache.crunch/crunch

outputIndex++;
if (collection instanceof PTable) {
 for (Object o : collection.materialize()) {
  Pair p = (Pair) o;
  os.writeBytes(p.first().toString());
 for (Object o : collection.materialize()) {
  os.writeBytes(o.toString() + "\r\n");
origin: cloudera/seismichadoop

Iterator<ByteBuffer> iter = result.materialize().iterator();
x.start();
while (iter.hasNext()) {
origin: org.apache.crunch/crunch-core

private void writeSequenceFileFromPCollection(final FileSystem fs, final Path path,
  final PCollection collection) throws IOException {
 final PType pType = collection.getPType();
 final Converter converter = pType.getConverter();
 final Class valueClass = converter.getValueClass();
 final SequenceFile.Writer writer = new SequenceFile.Writer(fs, fs.getConf(), path,
   NullWritable.class, valueClass);
 for (final Object o : collection.materialize()) {
  final Object value = pType.getOutputMapFn().map(o);
  writer.append(NullWritable.get(), value);
 }
 writer.close();
}
origin: org.apache.crunch/crunch-core

@SuppressWarnings({ "rawtypes", "unchecked" })
private void writeAvroFile(FSDataOutputStream outputStream, PCollection recordCollection) throws IOException {
 AvroType avroType = (AvroType)recordCollection.getPType();
 if (avroType == null) {
  throw new IllegalStateException("Can't write a non-typed Avro collection");
 }
 DatumWriter datumWriter = Avros.newWriter((AvroType)recordCollection.getPType());
 DataFileWriter dataFileWriter = new DataFileWriter(datumWriter);
 dataFileWriter.create(avroType.getSchema(), outputStream);
 for (Object record : recordCollection.materialize()) {
  dataFileWriter.append(avroType.getOutputMapFn().map(record));
 }
 dataFileWriter.close();
 outputStream.close();
}
origin: org.apache.crunch/crunch-core

private static <K, V> void configureReducers(GroupingOptions.Builder builder,
  PTable<K, V> ptable, Configuration conf, int numReducers) {
 if (numReducers <= 0) {
  numReducers = PartitionUtils.getRecommendedPartitions(ptable, conf);
  if (numReducers < 5) {
   // Not worth the overhead, force it to 1
   numReducers = 1;
  }
 }
 builder.numReducers(numReducers);
 if (numReducers > 1) {
  Iterable<K> iter = Sample.reservoirSample(ptable.keys(), numReducers - 1).materialize();
  MaterializableIterable<K> mi = (MaterializableIterable<K>) iter;
  if (mi.isSourceTarget()) {
   builder.sourceTargets((SourceTarget) mi.getSource());
  }
  builder.partitionerClass(TotalOrderPartitioner.class);
  builder.conf(TotalOrderPartitioner.PARTITIONER_PATH, mi.getPath().toString());
  //TODO: distcache handling
 }   
}
org.apache.crunchPCollectionmaterialize

Javadoc

Returns a reference to the data set represented by this PCollection that may be used by the client to read the data locally.

Popular methods of PCollection

  • parallelDo
    Applies the given doFn to the elements of this PCollection and returns a new PCollection that is the
  • getPType
    Returns the PType of this PCollection.
  • by
    Apply the given map function to each element of this instance in order to create a PTable.
  • write
    Write the contents of this PCollection to the given Target, using the given Target.WriteMode to hand
  • getPipeline
    Returns the Pipeline associated with this PCollection.
  • getTypeFamily
    Returns the PTypeFamily of this PCollection.
  • count
    Returns a PTable instance that contains the counts of each unique element of this PCollection.
  • aggregate
    Returns a PCollection that contains the result of aggregating all values in this instance.
  • asReadable
  • cache
    Marks this data as cached using the given CachingOptions. Cached PCollections will only be processed
  • filter
    Apply the given filter function to this instance and return the resulting PCollection.
  • first
  • filter,
  • first,
  • getName,
  • getSize,
  • union

Popular in Java

  • Reactive rest calls using spring rest template
  • getExternalFilesDir (Context)
  • onCreateOptionsMenu (Activity)
  • getSharedPreferences (Context)
  • Menu (java.awt)
  • Arrays (java.util)
    This class contains various methods for manipulating arrays (such as sorting and searching). This cl
  • PriorityQueue (java.util)
    A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
  • Handler (java.util.logging)
    A Handler object accepts a logging request and exports the desired messages to a target, for example
  • Notification (javax.management)
  • Get (org.apache.hadoop.hbase.client)
    Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
  • Top Vim plugins
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now