private String getVertexID( ElementGraph elementGraph, FlowElement flowElement ) { return vertexIDProvider.getVertexName( new Pair<>( elementGraph, flowElement ) ); }
@Override public void start( FlowProcess flowProcess, AggregatorCall<Pair<Long[], Tuple>> aggregatorCall ) { aggregatorCall.getContext().getLhs()[ 0 ] = 0L; }
@Override public void aggregate( FlowProcess flowProcess, AggregatorCall<Pair<Long[], Tuple>> aggregatorCall ) { aggregatorCall.getContext().getLhs()[ 0 ] += 1L; }
@Override public void prepare( FlowProcess flowProcess, OperationCall<Pair<Long[], Tuple>> operationCall ) { operationCall.setContext( new Pair<Long[], Tuple>( new Long[]{0L}, Tuple.size( 1 ) ) ); }
@Override public void aggregate( FlowProcess flowProcess, AggregatorCall<Pair<Double[], Tuple>> aggregatorCall ) { TupleEntry arguments = aggregatorCall.getArguments(); if( arguments.getObject( 0 ) == null ) return; Double[] sum = aggregatorCall.getContext().getLhs(); double value = sum[ 0 ] == null ? 0 : sum[ 0 ]; sum[ 0 ] = value + arguments.getDouble( 0 ); }
LOG.trace( "next N1: {}, N2: {}", prevN1, prevN2 ); return new Pair<>( prevN1, prevN2 );
private void assertGraphs( SimpleDirectedGraph<String, Object> full, SimpleDirectedGraph<String, Object> contracted, SimpleDirectedGraph<String, Object> result ) { // Set<String> vertices = ElementGraphs.findClosureViaBiConnected( full, contracted ); Set<String> vertices = ElementGraphs.findClosureViaFloydWarshall( full, contracted ).getLhs(); // Set<String> vertices = ElementGraphs.findClosureViaKShortest( full, contracted ); DirectedSubgraph<String, Object> subgraph = new DirectedSubgraph<>( full, vertices, null ); // System.out.println( "subgraph = " + subgraph ); SimpleDirectedGraph<String, Object> clone = new SimpleDirectedGraph<>( Object.class ); Graphs.addGraph( clone, subgraph ); assertEquals( result, clone ); }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall ) { String value = functionCall.getArguments().getString( 0 ); if( value == null ) value = ""; Matcher matcher = functionCall.getContext().getLhs().reset( value ); while( matcher.find() ) { functionCall.getContext().getRhs().setString( 0, matcher.group() ); functionCall.getOutputCollector().add( functionCall.getContext().getRhs() ); } } }
public static Pair<Double, Integer> getOptimalFalsePositiveRateAndNumHashes(long numBloomBits, long numElems, int minHashes, int maxHashes) { if (maxHashes < minHashes) { throw new IllegalArgumentException("Cannot have max # hashes smaller than min # hashes! " + maxHashes + " vs " + minHashes); } double bestFPRate = getFalsePositiveRate(maxHashes, numBloomBits, numElems); int bestBloomHashes = maxHashes; for (int i = maxHashes - 1; i >= minHashes; i--) { double newFalsePositiveRate = getFalsePositiveRate(i, numBloomBits, numElems); if (newFalsePositiveRate < bestFPRate) { bestFPRate = newFalsePositiveRate; bestBloomHashes = i; } } return new Pair<Double, Integer>(bestFPRate, bestBloomHashes); }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Pair<SimpleDateFormat, TupleEntry>> functionCall ) { long ts = functionCall.getArguments().getLong( 0 ); Calendar calendar = getCalendar(); calendar.setTimeInMillis( ts ); String formatted = functionCall.getContext().getLhs().format( calendar.getTime() ); functionCall.getContext().getRhs().setString( 0, formatted ); functionCall.getOutputCollector().add( functionCall.getContext().getRhs() ); } }
@Override public void prepare( FlowProcess flowProcess, OperationCall<Pair<Double[], Tuple>> operationCall ) { operationCall.setContext( new Pair<Double[], Tuple>( new Double[]{null}, Tuple.size( 1 ) ) ); }
public static ElementSubGraph asSubGraph( ElementGraph elementGraph, ElementGraph contractedGraph, Set<FlowElement> excludes ) { elementGraph = asExtentMaskedSubGraph( elementGraph ); // returns same instance if not bounded Pair<Set<FlowElement>, Set<Scope>> pair = findClosureViaFloydWarshall( directed( elementGraph ), directed( contractedGraph ), excludes ); Set<FlowElement> vertices = pair.getLhs(); Set<Scope> excludeEdges = pair.getRhs(); Set<Scope> scopes = new HashSet<>( elementGraph.edgeSet() ); scopes.removeAll( excludeEdges ); return new ElementSubGraph( elementGraph, vertices, scopes ); }
@Override public void prepare( FlowProcess flowProcess, OperationCall<NestedBaseFunction.Context> operationCall ) { Map<Fields, Pair<Predicate<?>, Pointer<Node>>> resolvedPointers = new LinkedHashMap<>(); Fields argumentFields = operationCall.getArgumentFields(); for( Map.Entry<Fields, Pointer<Node>> entry : this.pointers.entrySet() ) resolvedPointers.put( argumentFields.select( entry.getKey() ), new Pair<>( defaultValueFilter, entry.getValue() ) ); if( resolvedPointers.isEmpty() ) // use resolved argument fields { NestedPointerCompiler<Node, Result> compiler = getNestedPointerCompiler(); for( Iterator<Fields> iterator = argumentFields.fieldsIterator(); iterator.hasNext(); ) { Fields argument = iterator.next(); Pointer<Node> pointer = compiler.compile( rootPointer + "/" + argument.get( 0 ).toString() ); resolvedPointers.put( argument, new Pair<>( defaultValueFilter, pointer ) ); } } operationCall.setContext( new Context( resolvedPointers, Tuple.size( 1 ) ) ); }
public static BloomFilter createAndConsumeFilter(String bloomPartsDir, int maxHashes, int minHashes, long bloomFilterBits, int numSplits, double hllError, String bloomKeyCountsDir, HashFunctionFactory hashFactory, FilterConsumer filterConsumer) throws IOException, CardinalityMergeException { LOG.info("Bloom filter parts located in: " + bloomPartsDir); // This is the side bucket that the HyperLogLog writes to LOG.info("Getting key counts from: " + bloomKeyCountsDir); long prevJobTuples = getApproxDistinctKeysCount(hllError, bloomKeyCountsDir); Pair<Double, Integer> optimal = getOptimalFalsePositiveRateAndNumHashes(bloomFilterBits, prevJobTuples, minHashes, maxHashes); LOG.info("Counted about " + prevJobTuples + " distinct keys"); LOG.info("Using " + bloomFilterBits + " bits in the bloom filter"); LOG.info("Found a false positive rate of: " + optimal.getLhs()); LOG.info("Will use " + optimal.getRhs() + " bloom hashes"); long splitSize = getSplitSize(bloomFilterBits, numSplits); int numBloomHashes = optimal.getRhs(); BloomFilter filter; synchronized (BF_LOAD_LOCK) { // Load bloom filter parts and merge them. String path = bloomPartsDir + "/" + numBloomHashes; filter = mergeBloomParts(path, bloomFilterBits, splitSize, numBloomHashes, prevJobTuples, hashFactory); filterConsumer.accept(filter); } return filter; }
@Override public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, TupleEntry>> operationCall ) { TupleEntry tupleEntry = new TupleEntry( operationCall.getDeclaredFields(), Tuple.size( 1 ) ); operationCall.setContext( new Pair<>( getPattern().matcher( "" ), tupleEntry ) ); }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall ) { // coerce to string String value = functionCall.getArguments().getString( 0 ); // make safe if( value == null ) value = ""; TupleEntry output = functionCall.getContext().getRhs(); Matcher matcher = functionCall.getContext().getLhs().reset( value ); if( replaceAll ) output.setString( 0, matcher.replaceAll( replacement ) ); else output.setString( 0, matcher.replaceFirst( replacement ) ); functionCall.getOutputCollector().add( output ); }
@Override public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, TupleEntry>> operationCall ) { TupleEntry tupleEntry = new TupleEntry( operationCall.getDeclaredFields(), Tuple.size( 1 ) ); operationCall.setContext( new Pair<>( getPattern().matcher( "" ), tupleEntry ) ); }