@Override public Map<Tuple, Collection<Tuple>> create( FlowProcess<? extends Configuration> flowProcess ) { return new HadoopSpillableTupleMap( capacity, loadFactor, mapThreshold, listThreshold, flowProcess ); } }
int current() { return Math.max( minThreshold, Math.min( getInitListThreshold(), getMapThreshold() / size() ) ); }
private void performMapTest( int numKeys, int listSize, int mapThreshold, int listThreshold, Configuration jobConf ) { jobConf.set( "io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName() ); // disable/replace WritableSerialization class jobConf.set( "cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName() ); // not using Text, just testing parsing HadoopFlowProcess flowProcess = new HadoopFlowProcess( jobConf ); HadoopSpillableTupleMap map = new HadoopSpillableTupleMap( SpillableProps.defaultMapInitialCapacity, SpillableProps.defaultMapLoadFactor, mapThreshold, listThreshold, flowProcess ); Set<Integer> keySet = new HashSet<Integer>(); Random gen = new Random( 1 ); for( int i = 0; i < listSize * numKeys; i++ ) { String aString = "string number " + i; double random = Math.random(); double keys = numKeys / 3.0; int key = (int) ( gen.nextDouble() * keys + gen.nextDouble() * keys + gen.nextDouble() * keys ); Tuple tuple = new Tuple( i, aString, random, new Text( aString ), new TestText( aString ), new Tuple( "inner tuple", new BytesWritable( aString.getBytes() ) ) ); map.get( new Tuple( key ) ).add( tuple ); keySet.add( key ); } // the list test above verifies the contents are being serialized, the Map is just a container of lists. assertEquals( "not equal: map.size();", keySet.size(), map.size() ); } }
public HadoopSpillableTupleMap( int initialCapacity, float loadFactor, int mapThreshold, int listThreshold, FlowProcess<? extends Configuration> flowProcess ) { super( initialCapacity, loadFactor, mapThreshold, listThreshold ); this.flowProcess = flowProcess; this.spillStrategy = getSpillStrategy(); FactoryLoader loader = FactoryLoader.getInstance(); this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TupleMapFactory.TUPLE_MAP_FACTORY, HadoopTupleCollectionFactory.class ); }
@Override protected Collection<Tuple> createTupleCollection( Tuple tuple ) { Collection<Tuple> collection = tupleCollectionFactory.create( flowProcess ); if( collection instanceof Spillable ) { ( (Spillable) collection ).setGrouping( tuple ); ( (Spillable) collection ).setSpillListener( getSpillListener() ); ( (Spillable) collection ).setSpillStrategy( spillStrategy ); } return collection; }
private void performMapTest( int numKeys, int listSize, int mapThreshold, int listThreshold, Configuration jobConf ) { jobConf.set( "io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName() ); // disable/replace WritableSerialization class jobConf.set( "cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName() ); // not using Text, just testing parsing HadoopFlowProcess flowProcess = new HadoopFlowProcess( jobConf ); HadoopSpillableTupleMap map = new HadoopSpillableTupleMap( SpillableProps.defaultMapInitialCapacity, SpillableProps.defaultMapLoadFactor, mapThreshold, listThreshold, flowProcess ); Set<Integer> keySet = new HashSet<Integer>(); Random gen = new Random( 1 ); for( int i = 0; i < listSize * numKeys; i++ ) { String aString = "string number " + i; double random = Math.random(); double keys = numKeys / 3.0; int key = (int) ( gen.nextDouble() * keys + gen.nextDouble() * keys + gen.nextDouble() * keys ); Tuple tuple = new Tuple( i, aString, random, new Text( aString ), new TestText( aString ), new Tuple( "inner tuple", new BytesWritable( aString.getBytes() ) ) ); map.get( new Tuple( key ) ).add( tuple ); keySet.add( key ); } // the list test above verifies the contents are being serialized, the Map is just a container of lists. assertEquals( "not equal: map.size();", keySet.size(), map.size() ); } }
public HadoopSpillableTupleMap( int initialCapacity, float loadFactor, int mapThreshold, int listThreshold, FlowProcess<? extends Configuration> flowProcess ) { super( initialCapacity, loadFactor, mapThreshold, listThreshold ); this.flowProcess = flowProcess; this.spillStrategy = getSpillStrategy(); FactoryLoader loader = FactoryLoader.getInstance(); this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TupleMapFactory.TUPLE_MAP_FACTORY, HadoopTupleCollectionFactory.class ); }
@Override protected Collection<Tuple> createTupleCollection( Tuple tuple ) { Collection<Tuple> collection = tupleCollectionFactory.create( flowProcess ); if( collection instanceof Spillable ) { ( (Spillable) collection ).setGrouping( tuple ); ( (Spillable) collection ).setSpillListener( getSpillListener() ); ( (Spillable) collection ).setSpillStrategy( spillStrategy ); } return collection; }
int current() { return Math.max( minThreshold, Math.min( getInitListThreshold(), getMapThreshold() / size() ) ); }
@Override public Map<Tuple, Collection<Tuple>> create( FlowProcess<? extends Configuration> flowProcess ) { return new HadoopSpillableTupleMap( capacity, loadFactor, mapThreshold, listThreshold, flowProcess ); } }