cascading.tuple.hadoop.collect.HadoopSpillableTupleMap java code examples

@Override
public Map<Tuple, Collection<Tuple>> create( FlowProcess<? extends Configuration> flowProcess )
 {
 return new HadoopSpillableTupleMap( capacity, loadFactor, mapThreshold, listThreshold, flowProcess );
 }
}

int current()
 {
 return Math.max( minThreshold, Math.min( getInitListThreshold(), getMapThreshold() / size() ) );
 }

private void performMapTest( int numKeys, int listSize, int mapThreshold, int listThreshold, Configuration jobConf )
 {
 jobConf.set( "io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName() ); // disable/replace WritableSerialization class
 jobConf.set( "cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName() ); // not using Text, just testing parsing
 HadoopFlowProcess flowProcess = new HadoopFlowProcess( jobConf );
 HadoopSpillableTupleMap map = new HadoopSpillableTupleMap( SpillableProps.defaultMapInitialCapacity, SpillableProps.defaultMapLoadFactor, mapThreshold, listThreshold, flowProcess );
 Set<Integer> keySet = new HashSet<Integer>();
 Random gen = new Random( 1 );
 for( int i = 0; i < listSize * numKeys; i++ )
  {
  String aString = "string number " + i;
  double random = Math.random();
  double keys = numKeys / 3.0;
  int key = (int) ( gen.nextDouble() * keys + gen.nextDouble() * keys + gen.nextDouble() * keys );
  Tuple tuple = new Tuple( i, aString, random, new Text( aString ), new TestText( aString ), new Tuple( "inner tuple", new BytesWritable( aString.getBytes() ) ) );
  map.get( new Tuple( key ) ).add( tuple );
  keySet.add( key );
  }
 // the list test above verifies the contents are being serialized, the Map is just a container of lists.
 assertEquals( "not equal: map.size();", keySet.size(), map.size() );
 }
}

public HadoopSpillableTupleMap( int initialCapacity, float loadFactor, int mapThreshold, int listThreshold, FlowProcess<? extends Configuration> flowProcess )
 {
 super( initialCapacity, loadFactor, mapThreshold, listThreshold );
 this.flowProcess = flowProcess;
 this.spillStrategy = getSpillStrategy();
 FactoryLoader loader = FactoryLoader.getInstance();
 this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TupleMapFactory.TUPLE_MAP_FACTORY, HadoopTupleCollectionFactory.class );
 }

@Override
protected Collection<Tuple> createTupleCollection( Tuple tuple )
 {
 Collection<Tuple> collection = tupleCollectionFactory.create( flowProcess );
 if( collection instanceof Spillable )
  {
  ( (Spillable) collection ).setGrouping( tuple );
  ( (Spillable) collection ).setSpillListener( getSpillListener() );
  ( (Spillable) collection ).setSpillStrategy( spillStrategy );
  }
 return collection;
 }

private void performMapTest( int numKeys, int listSize, int mapThreshold, int listThreshold, Configuration jobConf )
 {
 jobConf.set( "io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName() ); // disable/replace WritableSerialization class
 jobConf.set( "cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName() ); // not using Text, just testing parsing
 HadoopFlowProcess flowProcess = new HadoopFlowProcess( jobConf );
 HadoopSpillableTupleMap map = new HadoopSpillableTupleMap( SpillableProps.defaultMapInitialCapacity, SpillableProps.defaultMapLoadFactor, mapThreshold, listThreshold, flowProcess );
 Set<Integer> keySet = new HashSet<Integer>();
 Random gen = new Random( 1 );
 for( int i = 0; i < listSize * numKeys; i++ )
  {
  String aString = "string number " + i;
  double random = Math.random();
  double keys = numKeys / 3.0;
  int key = (int) ( gen.nextDouble() * keys + gen.nextDouble() * keys + gen.nextDouble() * keys );
  Tuple tuple = new Tuple( i, aString, random, new Text( aString ), new TestText( aString ), new Tuple( "inner tuple", new BytesWritable( aString.getBytes() ) ) );
  map.get( new Tuple( key ) ).add( tuple );
  keySet.add( key );
  }
 // the list test above verifies the contents are being serialized, the Map is just a container of lists.
 assertEquals( "not equal: map.size();", keySet.size(), map.size() );
 }
}

public HadoopSpillableTupleMap( int initialCapacity, float loadFactor, int mapThreshold, int listThreshold, FlowProcess<? extends Configuration> flowProcess )
 {
 super( initialCapacity, loadFactor, mapThreshold, listThreshold );
 this.flowProcess = flowProcess;
 this.spillStrategy = getSpillStrategy();
 FactoryLoader loader = FactoryLoader.getInstance();
 this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TupleMapFactory.TUPLE_MAP_FACTORY, HadoopTupleCollectionFactory.class );
 }

@Override
protected Collection<Tuple> createTupleCollection( Tuple tuple )
 {
 Collection<Tuple> collection = tupleCollectionFactory.create( flowProcess );
 if( collection instanceof Spillable )
  {
  ( (Spillable) collection ).setGrouping( tuple );
  ( (Spillable) collection ).setSpillListener( getSpillListener() );
  ( (Spillable) collection ).setSpillStrategy( spillStrategy );
  }
 return collection;
 }

int current()
 {
 return Math.max( minThreshold, Math.min( getInitListThreshold(), getMapThreshold() / size() ) );
 }

@Override
public Map<Tuple, Collection<Tuple>> create( FlowProcess<? extends Configuration> flowProcess )
 {
 return new HadoopSpillableTupleMap( capacity, loadFactor, mapThreshold, listThreshold, flowProcess );
 }
}

Javadoc

HadoopSpillableTupleMap is responsible for spilling values to disk if the map threshold is reached.

Most used methods

<init>
size
get
getInitListThreshold
getSpillListener
getSpillStrategy
Method getSpillStrategy returns a SpillStrategy instance that is passed to the underlying Spillable

Popular in Java

Running tasks concurrently on multiple threads
getContentResolver (Context)
findViewById (Activity)
scheduleAtFixedRate (ScheduledExecutorService)
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Best plugins for Eclipse

How to useHadoopSpillableTupleMap in cascading.tuple.hadoop.collect

Best Java code snippets using cascading.tuple.hadoop.collect.HadoopSpillableTupleMap (Showing top 10 results out of 315)

How to use
HadoopSpillableTupleMap
in
cascading.tuple.hadoop.collect