protected void initLists() { collections = new Collection[ size() ]; // handle self joins if( numSelfJoins != 0 ) { Arrays.fill( collections, createTupleCollection( joinFields[ 0 ] ) ); } else { collections[ 0 ] = new FalseCollection(); // we iterate this only once per grouping for( int i = 1; i < joinFields.length; i++ ) collections[ i ] = createTupleCollection( joinFields[ i ] ); } joinedBuilder = makeJoinedBuilder( joinFields ); joinedTuplesArray = new Tuple[ collections.length ]; }
@Override protected HadoopCoGroupClosure createClosure() { return new HadoopCoGroupClosure( flowProcess, splice.getNumSelfJoins(), keyFields, valuesFields ); }
@Override public void reset( Tuple grouping, Iterator<Tuple>[] values ) { super.reset( grouping, values ); build(); }
private TupleBuilder makeJoinedBuilder( final Fields[] joinFields ) { final Fields[] fields = isSelfJoin() ? new Fields[ size() ] : joinFields; if( isSelfJoin() ) Arrays.fill( fields, 0, fields.length, joinFields[ 0 ] ); return new TupleBuilder() { Tuple result = TupleViews.createComposite( fields ); @Override public Tuple makeResult( Tuple[] tuples ) { return TupleViews.reset( result, tuples ); } }; }
@Override protected Collection<Tuple> createTupleCollection( Fields joinField ) { return new LazySpillableTupleCollection( super.createTupleCollection( joinField ) ); } }
protected void build() { clearGroups(); if( collections[ 0 ] instanceof FalseCollection ) // force reset on FalseCollection ( (FalseCollection) collections[ 0 ] ).reset( null ); while( values[ 0 ].hasNext() ) { IndexTuple current = (IndexTuple) values[ 0 ].next(); int pos = current.getIndex(); // if this is the first (lhs) co-group, just use values iterator // we are guaranteed all the remainder tuples in the iterator are from pos == 0 if( numSelfJoins == 0 && pos == 0 ) { ( (FalseCollection) collections[ 0 ] ).reset( createIterator( current, values[ 0 ] ) ); break; } collections[ pos ].add( current.getTuple() ); // get the value tuple for this cogroup } }
protected Collection<Tuple> createTupleCollection( Fields joinField ) { Collection<Tuple> collection = tupleCollectionFactory.create( flowProcess ); if( collection instanceof Spillable ) ( (Spillable) collection ).setSpillListener( createListener( joinField ) ); return collection; }
@Override public Iterator<Tuple> getIterator( int pos ) { if( pos < 0 || pos >= collections.length ) throw new IllegalArgumentException( "invalid group position: " + pos ); return makeIterator( pos, collections[ pos ].iterator() ); }
public HadoopCoGroupClosure( FlowProcess flowProcess, int numSelfJoins, Fields[] groupingFields, Fields[] valueFields ) { super( flowProcess, groupingFields, valueFields ); this.numSelfJoins = numSelfJoins; this.emptyTuple = Tuple.size( groupingFields[ 0 ].size() ); FactoryLoader loader = FactoryLoader.getInstance(); this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TUPLE_COLLECTION_FACTORY, HadoopTupleCollectionFactory.class ); initLists(); }
@Override protected Collection<Tuple> createTupleCollection( Fields joinField ) { return new LazySpillableTupleCollection( super.createTupleCollection( joinField ) ); } }
private TupleBuilder makeJoinedBuilder( final Fields[] joinFields ) { final Fields[] fields = isSelfJoin() ? new Fields[ size() ] : joinFields; if( isSelfJoin() ) Arrays.fill( fields, 0, fields.length, joinFields[ 0 ] ); return new TupleBuilder() { Tuple result = TupleViews.createComposite( fields ); @Override public Tuple makeResult( Tuple[] tuples ) { return TupleViews.reset( result, tuples ); } }; }
protected void build() { clearGroups(); if( collections[ 0 ] instanceof FalseCollection ) // force reset on FalseCollection ( (FalseCollection) collections[ 0 ] ).reset( null ); while( values[ 0 ].hasNext() ) { IndexTuple current = (IndexTuple) values[ 0 ].next(); int pos = current.getIndex(); // if this is the first (lhs) co-group, just use values iterator // we are guaranteed all the remainder tuples in the iterator are from pos == 0 if( numSelfJoins == 0 && pos == 0 ) { ( (FalseCollection) collections[ 0 ] ).reset( createIterator( current, values[ 0 ] ) ); break; } collections[ pos ].add( current.getTuple() ); // get the value tuple for this cogroup } }
protected Collection<Tuple> createTupleCollection( Fields joinField ) { Collection<Tuple> collection = tupleCollectionFactory.create( flowProcess ); if( collection instanceof Spillable ) ( (Spillable) collection ).setSpillListener( createListener( joinField ) ); return collection; }
@Override public Iterator<Tuple> getIterator( int pos ) { if( pos < 0 || pos >= collections.length ) throw new IllegalArgumentException( "invalid group position: " + pos ); return makeIterator( pos, collections[ pos ].iterator() ); }
public HadoopCoGroupClosure( FlowProcess flowProcess, int numSelfJoins, Fields[] groupingFields, Fields[] valueFields ) { super( flowProcess, groupingFields, valueFields ); this.numSelfJoins = numSelfJoins; this.emptyTuple = Tuple.size( groupingFields[ 0 ].size() ); FactoryLoader loader = FactoryLoader.getInstance(); this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TUPLE_COLLECTION_FACTORY, HadoopTupleCollectionFactory.class ); initLists(); }
protected void initLists() { collections = new Collection[ size() ]; // handle self joins if( numSelfJoins != 0 ) { Arrays.fill( collections, createTupleCollection( joinFields[ 0 ] ) ); } else { collections[ 0 ] = new FalseCollection(); // we iterate this only once per grouping for( int i = 1; i < joinFields.length; i++ ) collections[ i ] = createTupleCollection( joinFields[ i ] ); } joinedBuilder = makeJoinedBuilder( joinFields ); joinedTuplesArray = new Tuple[ collections.length ]; }
@Override protected HadoopCoGroupClosure createClosure() { return new HadoopCoGroupClosure( flowProcess, splice.getNumSelfJoins(), keyFields, valuesFields ); }
@Override public void reset( Tuple grouping, Iterator<Tuple>[] values ) { super.reset( grouping, values ); build(); }