private cascading.tuple.util.TupleBuilder makeBuilder( final Fields valueField, final Fields joinField ) { if( valueField.isUnknown() || joinField.isNone() ) { return new cascading.tuple.util.TupleBuilder() { @Override public Tuple makeResult( Tuple valueTuple, Tuple groupTuple ) { valueTuple.set( valueFields[ cleanPos ], joinFields[ cleanPos ], groupTuple ); return valueTuple; } }; } else { return new cascading.tuple.util.TupleBuilder() { Tuple result = TupleViews.createOverride(valueField, joinField); @Override public Tuple makeResult(Tuple valueTuple, Tuple groupTuple) { return TupleViews.reset(result, valueTuple, groupTuple); } }; } }
private static Type[] joinTypes( int size, Fields... fields ) { Type[] elements = new Type[ size ]; int pos = 0; for( Fields field : fields ) { if( field.isNone() ) continue; if( field.types == null ) return null; System.arraycopy( field.types, 0, elements, pos, field.size() ); pos += field.size(); } return elements; }
@Override public void prepare() { if( role != IORole.source ) { throw new UnsupportedOperationException("Non-source group by not supported in GroupByInGate"); } if( role != IORole.sink ) { closure = new GroupByClosure(flowProcess, keyFields, valuesFields); } if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) { grouping.joinerClosure = closure; } }
private Fields createJoinFields( Set<Scope> incomingScopes, Map<String, Fields> groupingSelectors, Fields declared ) { if( declared.isNone() ) declared = Fields.UNKNOWN; Map<String, Fields> incomingFields = new HashMap<String, Fields>(); for( Scope scope : incomingScopes ) incomingFields.put( scope.getName(), scope.getIncomingSpliceFields() ); Fields outGroupingFields = Fields.NONE; int offset = 0; for( Pipe pipe : pipes ) // need to retain order of pipes { String pipeName = pipe.getName(); Fields pipeGroupingSelector = groupingSelectors.get( pipeName ); Fields incomingField = incomingFields.get( pipeName ); if( !pipeGroupingSelector.isNone() ) { Fields offsetFields = incomingField.selectPos( pipeGroupingSelector, offset ); Fields resolvedSelect = declared.select( offsetFields ); outGroupingFields = outGroupingFields.append( resolvedSelect ); } offset += incomingField.size(); } return outGroupingFields; }
/** * Method isArgSelector returns true if this instance is 'defined' or the field set {@link #ALL}, {@link #GROUP}, or * {@link #VALUES}. * * @return the argSelector (type boolean) of this Fields object. */ public boolean isArgSelector() { return isAll() || isNone() || isGroup() || isValues() || isDefined(); }
@Override public void prepare() { if( role != IORole.source ) { throw new UnsupportedOperationException("Non-source group by not supported in CoGroupBufferInGate"); } if( role != IORole.sink ) { closure = new CoGroupBufferClosure(flowProcess, this.getSplice().getNumSelfJoins(), keyFields, valuesFields); } if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) { grouping.joinerClosure = closure; } }
@Override public void prepare() { if( role != IORole.source ) collector = new MeasuredOutputCollector( flowProcess, SliceCounters.Write_Duration, createOutputCollector() ); if( role != IORole.sink ) closure = createClosure(); if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) grouping.joinerClosure = closure; }
@Override public void prepare() { if( role != IORole.source ) collector = new MeasuredOutputCollector( flowProcess, SliceCounters.Write_Duration, createOutputCollector() ); if( role != IORole.sink ) closure = createClosure(); if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) grouping.joinerClosure = closure; }
/** * Method selectTupleCopy selects the fields specified in selector from this instance. * <p> * It is guaranteed to return a new modifiable Tuple instance at a cost of copying data. * <p> * The returned instance is safe to cache. * * @param selector Fields selector that selects the values to return * @return Tuple */ public Tuple selectTupleCopy( Fields selector ) { if( selector == null || selector.isAll() || fields == selector ) // == is intentional return new Tuple( this.tuple ); if( selector.isNone() ) return new Tuple(); return tuple.get( fields, selector ); }
private DataSet<?> prepareCoGroupInput(List<DataSet<Tuple>> inputs, FlowNode node, int dop) { CoGroup coGroup = (CoGroup)getSingle(node.getSinkElements()); Joiner joiner = coGroup.getJoiner(); int numJoinInputs = coGroup.isSelfJoin() ? coGroup.getNumSelfJoins() + 1 : inputs.size(); Fields[] inputFields = new Fields[numJoinInputs]; Fields[] keyFields = new Fields[numJoinInputs]; String[][] flinkKeys = new String[numJoinInputs][]; List<DataSet<Tuple>> joinInputs = computeSpliceInputsFieldsKeys(coGroup, node, inputs, inputFields, keyFields, flinkKeys); if(joiner.getClass().equals(InnerJoin.class)) { if(!keyFields[0].isNone()) { return prepareFullOuterCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop); } else { // Cartesian product return prepareInnerCrossInput(joinInputs, node, inputFields, dop); } } else if(joiner.getClass().equals(BufferJoin.class)) { return prepareBufferCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop); } else { return prepareFullOuterCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop); } }
/** * Method selectInto selects the fields specified in the selector from this instance and copies * them into the given tuple argument. * * @param selector of type Fields * @param tuple of type Tuple * @return returns the given tuple argument with new values added */ public Tuple selectInto( Fields selector, Tuple tuple ) { if( selector.isNone() ) return tuple; int[] pos = this.tuple.getPos( fields, selector ); if( pos == null || pos.length == 0 ) { tuple.addAll( this.tuple ); } else { for( int i : pos ) tuple.add( this.tuple.getObject( i ) ); } return tuple; }
/** * Method selectTuple selects the fields specified in the selector from this instance. If {@link Fields#ALL} or the * same fields as declared are given, {@code this.getTuple()} will be returned. * <p> * The returned Tuple will be either modifiable or unmodifiable, depending on the state of this TupleEntry instance. * <p> * See {@link #selectTupleCopy(Fields)} to guarantee a copy suitable for modifying or caching/storing in a collection. * <p> * Note this is a bug fix and change from 2.0 and 2.1. In previous versions the modifiable state was dependent * on the given selector. * * @param selector Fields selector that selects the values to return * @return Tuple */ public Tuple selectTuple( Fields selector ) { if( selector == null || selector.isAll() || fields == selector ) // == is intentional return this.tuple; if( selector.isNone() ) return isUnmodifiable ? Tuple.NULL : new Tuple(); Tuple result = tuple.get( fields, selector ); if( isUnmodifiable ) Tuples.asUnmodifiable( result ); return result; }
@Override public void prepare() { super.prepare(); keyValues = createKeyValuesArray(); closure = new MemoryCoGroupClosure( flowProcess, splice.getNumSelfJoins(), keyFields, valuesFields ); if( grouping != null && splice.getJoinDeclaredFields() != null && splice.getJoinDeclaredFields().isNone() ) grouping.joinerClosure = closure; }
/** * Method isDeclarator returns true if this can be used as a declarator. Specifically if it is 'defined' or * {@link #UNKNOWN}, {@link #ALL}, {@link #ARGS}, {@link #GROUP}, or {@link #VALUES}. * * @return the declarator (type boolean) of this Fields object. */ public boolean isDeclarator() { return isUnknown() || isNone() || isAll() || isArguments() || isGroup() || isValues() || isDefined(); }
/** * Method selectEntry selects the fields specified in the selector from this instance. If {@link Fields#ALL} or the * same fields as declared are given, {@code this} will be returned. * <p> * The returned TupleEntry will be either modifiable or unmodifiable, depending on the state of this TupleEntry instance. * <p> * See {@link #selectEntryCopy(Fields)} to guarantee a copy suitable for modifying or caching/storing in a collection. * <p> * Note this is a bug fix and change from 2.0 and 2.1. In previous versions the modifiable state was dependent * on the given selector. * * @param selector Fields selector that selects the values to return * @return TupleEntry */ public TupleEntry selectEntry( Fields selector ) { if( selector == null || selector.isAll() || fields == selector ) // == is intentional return this; if( selector.isNone() ) return isUnmodifiable ? TupleEntry.NULL : new TupleEntry(); return new TupleEntry( Fields.asDeclaration( selector ), tuple.get( this.fields, selector ), isUnmodifiable ); }
/** * Method selectEntry selects the fields specified in selector from this instance. * <p> * It is guaranteed to return a new modifiable TupleEntry instance at a cost of copying data. * <p> * The returned instance is safe to cache. * * @param selector Fields selector that selects the values to return * @return TupleEntry */ public TupleEntry selectEntryCopy( Fields selector ) { if( selector == null || selector.isAll() || fields == selector ) // == is intentional return new TupleEntry( this ); if( selector.isNone() ) return new TupleEntry(); return new TupleEntry( Fields.asDeclaration( selector ), tuple.get( this.fields, selector ) ); }
private Map<String, Fields> resolveSelectorsAgainstIncoming( Set<Scope> incomingScopes, Map<String, Fields> selectors, String type ) { Map<String, Fields> resolvedFields = new HashMap<String, Fields>(); for( Scope incomingScope : incomingScopes ) { Fields selector = selectors.get( incomingScope.getName() ); if( selector == null ) throw new OperatorException( this, "no " + type + " selector found for: " + incomingScope.getName() ); Fields incomingFields; if( selector.isNone() ) incomingFields = Fields.NONE; else if( selector.isAll() ) incomingFields = incomingScope.getIncomingSpliceFields(); else if( selector.isGroup() ) incomingFields = incomingScope.getOutGroupingFields(); else if( selector.isValues() ) incomingFields = incomingScope.getOutValuesFields().subtract( incomingScope.getOutGroupingFields() ); else incomingFields = incomingScope.getIncomingSpliceFields().select( selector ); resolvedFields.put( incomingScope.getName(), incomingFields ); } return resolvedFields; }
/** * Method asDeclaration returns a new Fields instance for use as a declarator based on the given fields value. * <p> * Typically this is used to convert a selector to a declarator. Simply, all numeric position fields are replaced * by their absolute position. * <p> * Comparators are preserved in the result. * * @param fields of type Fields * @return Fields */ public static Fields asDeclaration( Fields fields ) { if( fields == null ) return null; if( fields.isNone() ) return fields; if( !fields.isDefined() ) return UNKNOWN; if( fields.isOrdered() ) return fields; Fields result = size( fields.size() ); copy( null, result, fields, 0 ); result.types = copyTypes( fields.types, result.size() ); result.comparators = fields.comparators; return result; }
@Override public Scope outgoingScopeFor( Set<Scope> incomingScopes ) { Scope incomingScope = getFirst( incomingScopes ); if( !isBuffer() && incomingScope.getOutValuesFields().isNone() ) throw new OperatorException( this, "only a Buffer may be preceded by a CoGroup declaring Fields.NONE as the join fields" ); Fields argumentFields = resolveArgumentSelector( incomingScopes ); verifyArguments( argumentFields ); // we currently don't support using result from a previous Every in the current Every verifyAggregatorArguments( argumentFields, incomingScope ); Fields declaredFields = resolveDeclared( incomingScopes, argumentFields ); verifyDeclaredFields( declaredFields ); Fields outgoingGroupingFields = resolveOutgoingGroupingSelector( incomingScopes, argumentFields, declaredFields ); verifyOutputSelector( outgoingGroupingFields ); Fields outgoingValuesFields = incomingScope.getOutValuesFields(); // the incoming fields eligible to be outgoing, for Every only the grouping fields. Fields passThroughFields = resolveIncomingOperationPassThroughFields( incomingScope ); Fields remainderFields = resolveRemainderFields( incomingScopes, argumentFields ); return new Scope( getName(), Scope.Kind.EVERY, passThroughFields, remainderFields, argumentFields, declaredFields, outgoingGroupingFields, outgoingValuesFields ); }