/** * Returns a sink that puts {@code Map.Entry}s it receives into the given * Hazelcast {@code IMap}. * <p> * <strong>NOTE:</strong> Jet only remembers the name of the map you supply * and acquires a map with that name on the local cluster. If you supply a * map instance from another cluster, no error will be thrown to indicate * this. * <p> * This sink provides the exactly-once guarantee thanks to <i>idempotent * updates</i>. It means that the value with the same key is not appended, * but overwritten. After the job is restarted from snapshot, duplicate * items will not change the state in the target map. * <p> * The default local parallelism for this sink is 1. */ @Nonnull public static <K, V> Sink<Entry<K, V>> map(@Nonnull IMap<? super K, ? super V> map) { return map(map.getName()); }
public static Pipeline build(String bootstrapServers) { Properties properties = new Properties(); properties.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString()); properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName()); properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName()); properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); Pipeline pipeline = Pipeline.create(); pipeline .drawFrom(KafkaSources.kafka(properties, Constants.TOPIC_NAME_PRECIOUS)) .drainTo(Sinks.map(Constants.IMAP_NAME_PRECIOUS)); return pipeline; }
public static Pipeline buildPipeline(String sourceName, String sinkName) { Pattern pattern = Pattern.compile("\\W+"); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, String>map(sourceName)) .flatMap(e -> Traversers.traverseArray(pattern.split(e.getValue().toLowerCase())) .filter(w -> !w.isEmpty())) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(sinkName)); return pipeline; }
private static Pipeline buildPipeline() { Pattern delimiter = Pattern.compile("\\W+"); Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Long, String>map(BOOK_LINES)) .flatMap(e -> traverseArray(delimiter.split(e.getValue().toLowerCase()))) .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(COUNTS)); return p; }
/** * This will take the contents of source map and writes it into the sink map. */ private static Pipeline mapSourceAndSink(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.map(sourceMapName)) .drainTo(Sinks.map(sinkMapName)); return pipeline; }
private static Pipeline buildPipeline(String connectionUrl) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.jdbc(connectionUrl, "SELECT * FROM " + TABLE_NAME, resultSet -> new User(resultSet.getInt(1), resultSet.getString(2)))) .map(user -> Util.entry(user.getId(), user)) .drainTo(Sinks.map(MAP_NAME)); return p; }
/** * Builds and returns the Pipeline which represents the actual computation. * To compute the probability of finding word B after A, one has to know * how many pairs contain word A as a first entry and how many of them * contain B as a second entry. The pipeline creates pairs from consecutive * words and computes the probabilities of A->B. */ private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); // Reads files line-by-line BatchStage<String> lines = p.drawFrom(Sources.<String>files(INPUT_FILE)); Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)"); // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions // and aggregates them into an IMap. lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2)))) .groupingKey(Tuple2::f0) .aggregate(buildAggregateOp()) .drainTo(Sinks.map("stateTransitions")); return p; }
private Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(KafkaSources.kafka(brokerProperties(), "t1", "t2")) .drainTo(Sinks.map(SINK_NAME)); return p; }
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the job */ public static Pipeline buildPipeline() { final Pipeline p = Pipeline.create(); // Compute map server side final BatchStage<Horse> c = p.drawFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT)); final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem()) .aggregate(counting()) .filter(ent -> ent.getValue() > 1); c2.drainTo(Sinks.map(MULTIPLE)); return p; }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Trade, Integer, Trade>mapJournal(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, START_FROM_CURRENT)) .groupingKey(Trade::getTicker) .rollingAggregate(summingLong(Trade::getPrice)) .drainTo(Sinks.map(VOLUME_MAP_NAME)); return p; }
(tfVal, logDocCountVal) -> toInvertedIndexEntry( logDocCountVal, tfVal.getKey(), tfVal.getValue().entrySet())) .drainTo(Sinks.map(INVERTED_INDEX));
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Entry<String, Integer>, Integer, Entry<String, Integer>>mapJournal(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, START_FROM_CURRENT)) .groupingKey(Entry::getKey) .rollingAggregate(summingLong(Entry::getValue)) .drainTo(Sinks.map(VOLUME_MAP_NAME)); return p; }
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the real-time analysis */ public static Pipeline buildPipeline() { final Pipeline pipeline = Pipeline.create(); // Draw users from the Hazelcast IMDG source BatchStage<User> users = pipeline.drawFrom(Sources.<User, Long, User>map(USER_ID, e -> true, Entry::getValue)); // All bet legs which are single BatchStage<Tuple3<Race, Horse, Bet>> bets = users.flatMap(user -> traverseStream( user.getKnownBets().stream() .filter(Bet::single) .flatMap(bet -> bet.getLegs().stream().map(leg -> tuple3(leg.getRace(), leg.getBacking(), bet))) ) ); // Find for each race the projected loss if each horse was to win BatchStage<Entry<Race, Map<Horse, Double>>> betsByRace = bets.groupingKey(Tuple3::f0).aggregate( AggregateOperations.toMap( Tuple3::f1, t -> t.f2().projectedPayout(t.f1()), // payout if backed horse was to win (l, r) -> l + r ) ); // Write out: (r : (h : losses)) betsByRace.drainTo(Sinks.map(WORST_ID)); return pipeline; }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(AvroSources.filesBuilder(AvroSink.DIRECTORY_NAME, ReflectDatumReader<User>::new) //Both Jet members share the same local file system .sharedFileSystem(true) .build()) .map(user -> Util.entry(user.getUsername(), user)) .drainTo(Sinks.map(AvroSink.MAP_NAME)); return p; }
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); JetInstance localJet = Jet.newJetInstance(); try { HazelcastInstance externalHz = startExternalHazelcast(); IMap<Integer, Integer> sourceMap = externalHz.getMap(MAP_1); for (int i = 0; i < ITEM_COUNT; i++) { sourceMap.put(i, i); } ClientConfig clientConfig = clientConfigForExternalHazelcast(); // pipeline that copies the remote map to a local with the same name Pipeline p1 = Pipeline.create(); p1.drawFrom(Sources.remoteMap(MAP_1, clientConfig)) .drainTo(Sinks.map(MAP_1)); // pipeline that copies the local map to a remote with different name Pipeline p2 = Pipeline.create(); p2.drawFrom(Sources.map(MAP_1)) .drainTo(Sinks.remoteMap(MAP_2, clientConfig)); localJet.newJob(p1).join(); System.out.println("Local map-1 contents: " + localJet.getMap(MAP_1).entrySet()); localJet.newJob(p2).join(); System.out.println("Remote map-2 contents: " + externalHz.getMap(MAP_2).entrySet()); } finally { Jet.shutdownAll(); Hazelcast.shutdownAll(); } }
/** * This code is the main point of the sample: use the source builder to * create an HTTP source connector, then create a Jet pipeline that * performs windowed aggregation over its data. */ private static Pipeline buildPipeline() { StreamSource<TimestampedItem<Long>> usedMemorySource = SourceBuilder .timestampedStream("used-memory", x -> new PollHttp()) .fillBufferFn(PollHttp::fillBuffer) .destroyFn(PollHttp::close) .build(); Pipeline p = Pipeline.create(); p.drawFrom(usedMemorySource) .window(sliding(100, 20)) .aggregate(linearTrend(TimestampedItem::timestamp, TimestampedItem::item)) .map(tsItem -> entry(tsItem.timestamp(), tsItem.item())) .drainTo(Sinks.map(MAP_NAME)); return p; }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline coGroupDirect() { Pipeline p = Pipeline.create(); // Create three source streams BatchStageWithKey<PageVisit, Integer> pageVisits = p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT)) .groupingKey(pageVisit -> pageVisit.userId()); BatchStageWithKey<AddToCart, Integer> addToCarts = p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART)) .groupingKey(addToCart -> addToCart.userId()); BatchStageWithKey<Payment, Integer> payments = p.drawFrom(Sources.<Payment>list(PAYMENT)) .groupingKey(payment -> payment.userId()); // Construct the co-group transform. The aggregate operation collects all // the stream items into a 3-tuple of lists. BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped = pageVisits.aggregate3(toList(), addToCarts, toList(), payments, toList()); // Store the results in the output map coGrouped.drainTo(Sinks.map(RESULT)); return p; }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline coGroupDirect() { Pipeline p = Pipeline.create(); // Create three source streams BatchStageWithKey<PageVisit, Integer> pageVisits = p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT)) .groupingKey(pageVisit -> pageVisit.userId()); BatchStageWithKey<AddToCart, Integer> addToCarts = p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART)) .groupingKey(addToCart -> addToCart.userId()); BatchStageWithKey<Payment, Integer> payments = p.drawFrom(Sources.<Payment>list(PAYMENT)) .groupingKey(payment -> payment.userId()); // Construct the co-group transform. The aggregate operation collects all // the stream items into a 3-tuple of lists. BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped = pageVisits.aggregate3(toList(), addToCarts, toList(), payments, toList()); // Store the results in the output map coGrouped.drainTo(Sinks.map(RESULT)); return p; }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline coGroupBuild() { Pipeline p = Pipeline.create(); // Create three source streams BatchStageWithKey<PageVisit, Integer> pageVisits = p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT)) .groupingKey(pageVisit -> pageVisit.userId()); BatchStageWithKey<AddToCart, Integer> addToCarts = p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART)) .groupingKey(addToCart -> addToCart.userId()); BatchStageWithKey<Payment, Integer> payments = p.drawFrom(Sources.<Payment>list(PAYMENT)) .groupingKey(payment -> payment.userId()); // Obtain a builder object for the co-group transform GroupAggregateBuilder<Integer, List<PageVisit>> builder = pageVisits.aggregateBuilder(toList()); Tag<List<PageVisit>> visitTag = builder.tag0(); // Add the co-grouped streams to the builder. Here we add just two, but // you could add any number of them. Tag<List<AddToCart>> cartTag = builder.add(addToCarts, toList()); Tag<List<Payment>> payTag = builder.add(payments, toList()); // Build the co-group transform. The aggregate operation collects all the // stream items into an accumulator class called ItemsByTag. We transform // it into a 3-tuple of lists. BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped = builder.build((key, res) -> entry(key, tuple3(res.get(visitTag), res.get(cartTag), res.get(payTag)))); // Store the results in the output map coGrouped.drainTo(Sinks.map(RESULT)); return p; }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline coGroupBuild() { Pipeline p = Pipeline.create(); // Create three source streams BatchStageWithKey<PageVisit, Integer> pageVisits = p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT)) .groupingKey(pageVisit -> pageVisit.userId()); BatchStageWithKey<AddToCart, Integer> addToCarts = p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART)) .groupingKey(addToCart -> addToCart.userId()); BatchStageWithKey<Payment, Integer> payments = p.drawFrom(Sources.<Payment>list(PAYMENT)) .groupingKey(payment -> payment.userId()); // Obtain a builder object for the co-group transform GroupAggregateBuilder<Integer, List<PageVisit>> builder = pageVisits.aggregateBuilder(toList()); Tag<List<PageVisit>> visitTag = builder.tag0(); // Add the co-grouped streams to the builder. Here we add just two, but // you could add any number of them. Tag<List<AddToCart>> cartTag = builder.add(addToCarts, toList()); Tag<List<Payment>> payTag = builder.add(payments, toList()); // Build the co-group transform. The aggregate operation collects all the // stream items into an accumulator class called ItemsByTag. We transform // it into a 3-tuple of lists. BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped = builder.build((key, res) -> entry(key, tuple3(res.get(visitTag), res.get(cartTag), res.get(payTag)))); // Store the results in the output map coGrouped.drainTo(Sinks.map(RESULT)); return p; }