public static Pipeline buildPipeline(String sourceName, String sinkName) { Pattern pattern = Pattern.compile("\\W+"); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, String>map(sourceName)) .flatMap(e -> Traversers.traverseArray(pattern.split(e.getValue().toLowerCase())) .filter(w -> !w.isEmpty())) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(sinkName)); return pipeline; }
private static Pipeline buildPipeline() { Pattern delimiter = Pattern.compile("\\W+"); Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Long, String>map(BOOK_LINES)) .flatMap(e -> traverseArray(delimiter.split(e.getValue().toLowerCase()))) .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(COUNTS)); return p; }
.flatMap(line -> traverseArray(delimiter.split(line.toLowerCase())).filter(w -> !w.isEmpty())) .groupingKey(wholeItem()) .aggregate(counting())
/** * Builds and returns the Pipeline which represents the actual computation. * To compute the probability of finding word B after A, one has to know * how many pairs contain word A as a first entry and how many of them * contain B as a second entry. The pipeline creates pairs from consecutive * words and computes the probabilities of A->B. */ private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); // Reads files line-by-line BatchStage<String> lines = p.drawFrom(Sources.<String>files(INPUT_FILE)); Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)"); // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions // and aggregates them into an IMap. lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2)))) .groupingKey(Tuple2::f0) .aggregate(buildAggregateOp()) .drainTo(Sinks.map("stateTransitions")); return p; }
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the real-time analysis */ public static Pipeline buildPipeline() { final Pipeline pipeline = Pipeline.create(); // Draw users from the Hazelcast IMDG source BatchStage<User> users = pipeline.drawFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue)); // All bet legs which are single BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream( user.getKnownBets().stream() .filter(Bet::single) .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet))) ) ); // Find for each race the projected loss if each horse was to win BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate( AggregateOperations.toMap( Tuple3::f1, t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win (l, r) -> l + r ) ); // Write out: (r : (h : losses)) betsByRace.drainTo(Sinks.map(WORST_ID)); return pipeline; }
private static Pipeline buildPipeline(String sourceDir, String targetDir) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.files(sourceDir)) .map(LogLine::parse) .filter((LogLine log) -> log.getResponseCode() >= 200 && log.getResponseCode() < 400) .flatMap(AccessLogAnalyzer::explodeSubPaths) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.files(targetDir)); return p; }