/** * Convenience for {@link #mapJournal(String, DistributedPredicate, * DistributedFunction, JournalInitialPosition)} * which will pass only {@link EntryEventType#ADDED ADDED} and * {@link EntryEventType#UPDATED UPDATED} events and will project the * event's key and new value into a {@code Map.Entry}. */ @Nonnull public static <K, V> StreamSource<Entry<K, V>> mapJournal( @Nonnull String mapName, @Nonnull JournalInitialPosition initialPos ) { return mapJournal(mapName, mapPutEvents(), mapEventToEntry(), initialPos); }
/** * Convenience for {@link #map(IMap, Predicate, Projection)} which uses a * {@link DistributedFunction} as the projection function. * <p> * <strong>NOTE:</strong> Jet only remembers the name of the map you supply * and acquires a map with that name on the local cluster. If you supply a * map instance from another cluster, no error will be thrown to indicate * this. */ @Nonnull public static <T, K, V> BatchSource<T> map( @Nonnull IMap<? extends K, ? extends V> map, @Nonnull Predicate<? super K, ? super V> predicate, @Nonnull DistributedFunction<? super Map.Entry<K, V>, ? extends T> projectionFn ) { return map(map.getName(), predicate, projectionFn); }
/** * Convenience for {@link #map(String, Predicate, Projection)} * which uses a {@link DistributedFunction} as the projection function. */ @Nonnull public static <T, K, V> BatchSource<T> map( @Nonnull String mapName, @Nonnull Predicate<? super K, ? super V> predicate, @Nonnull DistributedFunction<? super Map.Entry<K, V>, ? extends T> projectionFn ) { return batchFromProcessor("mapSource(" + mapName + ')', readMapP(mapName, predicate, projectionFn)); }
p.drawFrom(Sources.<Object, Trade>mapJournal(TRADES, START_FROM_CURRENT)) .map(entryValue()); BatchSource<Map.Entry<Integer, Product>> products = Sources.filesBuilder(resourcesPath) .sharedFileSystem(true) .glob("products.txt") }); BatchSource<Map.Entry<Integer, Broker>> brokers = Sources.filesBuilder(resourcesPath) .sharedFileSystem(true) .glob("brokers.txt")
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); JetInstance localJet = Jet.newJetInstance(); try { HazelcastInstance externalHz = startExternalHazelcast(); IMap<Integer, Integer> sourceMap = externalHz.getMap(MAP_1); for (int i = 0; i < ITEM_COUNT; i++) { sourceMap.put(i, i); } ClientConfig clientConfig = clientConfigForExternalHazelcast(); // pipeline that copies the remote map to a local with the same name Pipeline p1 = Pipeline.create(); p1.drawFrom(Sources.remoteMap(MAP_1, clientConfig)) .drainTo(Sinks.map(MAP_1)); // pipeline that copies the local map to a remote with different name Pipeline p2 = Pipeline.create(); p2.drawFrom(Sources.map(MAP_1)) .drainTo(Sinks.remoteMap(MAP_2, clientConfig)); localJet.newJob(p1).join(); System.out.println("Local map-1 contents: " + localJet.getMap(MAP_1).entrySet()); localJet.newJob(p2).join(); System.out.println("Remote map-2 contents: " + externalHz.getMap(MAP_2).entrySet()); } finally { Jet.shutdownAll(); Hazelcast.shutdownAll(); } }
/** * Returns a source that emits items retrieved from a Hazelcast {@code * IList}. All elements are emitted on a single member — the one * where the entire list is stored by the IMDG. * <p> * If the {@code IList} is modified while being read, the source may miss * and/or duplicate some entries. * <p> * <strong>NOTE:</strong> Jet only remembers the name of the list you * supply and acquires a list with that name on the local cluster. If you * supply a list instance from another cluster, no error will be thrown to * indicate this. * <p> * The source does not save any state to snapshot. If the job is restarted, * it will re-emit all entries. * <p> * The default local parallelism for this processor is 1. */ @Nonnull public static <T> BatchSource<T> list(@Nonnull IList<? extends T> list) { return list(list.getName()); }
/** * A source to read all files in a directory in a batch way. * <p> * This method is a shortcut for: <pre>{@code * filesBuilder(directory) * .charset(UTF_8) * .glob(GLOB_WILDCARD) * .sharedFileSystem(false) * .mapToOutputFn((fileName, line) -> line) * .build() * }</pre> * <p> * If files are appended to while being read, the addition might or might * not be emitted or part of a line can be emitted. If files are modified * in more complex ways, the behavior is undefined. * * See {@link #filesBuilder(String)}. */ @Nonnull public static BatchSource<String> files(@Nonnull String directory) { return filesBuilder(directory).build(); }
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); JetInstance localJet = Jet.newJetInstance(); try { HazelcastInstance externalHz = startExternalHazelcast(); IList<Integer> sourceList = externalHz.getList(LIST_1); for (int i = 0; i < ITEM_COUNT; i++) { sourceList.add(i); } ClientConfig clientConfig = clientConfigForExternalHazelcast(); // pipeline that copies the remote list to a local with the same name Pipeline p1 = Pipeline.create(); p1.drawFrom(Sources.remoteList(LIST_1, clientConfig)) .drainTo(Sinks.list(LIST_1)); // pipeline that copies the local list to a remote with a different name Pipeline p2 = Pipeline.create(); p2.drawFrom(Sources.list(LIST_1)) .drainTo(Sinks.remoteList(LIST_2, clientConfig)); localJet.newJob(p1).join(); System.out.println("Local list-1 contents: " + new ArrayList<>(localJet.getList(LIST_1))); localJet.newJob(p2).join(); System.out.println("Remote list-2 contents: " + new ArrayList<>(externalHz.getList(LIST_2))); } finally { Jet.shutdownAll(); Hazelcast.shutdownAll(); } }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.files(getBooksPath())) .filter(line -> line.startsWith("The ")) .drainTo(buildTopicSink()); return p; }
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); JetInstance jet = Jet.newJetInstance(); try { IList<Integer> inputList = jet.getList(INPUT_LIST); for (int i = 0; i < ITEM_COUNT; i++) { inputList.add(i); } Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Integer>list(INPUT_LIST)) .map(i -> "item" + i) .drainTo(Sinks.list(RESULT_LIST)); jet.newJob(p).join(); IList<String> outputList = jet.getList(RESULT_LIST); System.out.println("Result list items: " + new ArrayList<>(outputList)); } finally { Jet.shutdownAll(); } } }
/** * A source to stream lines added to files in a directory. This is a * streaming source, it will watch directory and emit lines as they are * appended to files in that directory. * <p> * This method is a shortcut for: <pre>{@code * filesBuilder(directory) * .charset(UTF_8) * .glob(GLOB_WILDCARD) * .sharedFileSystem(false) * .mapToOutputFn((fileName, line) -> line) * .buildWatcher() * }</pre> * * See {@link #filesBuilder(String)}. */ @Nonnull public static StreamSource<String> fileWatcher(@Nonnull String watchedDirectory) { return filesBuilder(watchedDirectory).buildWatcher(); }
/** * Builds and returns the Pipeline which represents the actual computation. * To compute the probability of finding word B after A, one has to know * how many pairs contain word A as a first entry and how many of them * contain B as a second entry. The pipeline creates pairs from consecutive * words and computes the probabilities of A->B. */ private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); // Reads files line-by-line BatchStage<String> lines = p.drawFrom(Sources.<String>files(INPUT_FILE)); Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)"); // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions // and aggregates them into an IMap. lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2)))) .groupingKey(Tuple2::f0) .aggregate(buildAggregateOp()) .drainTo(Sinks.map("stateTransitions")); return p; }
/** * Convenience for {@link #mapJournal(IMap, DistributedPredicate, * DistributedFunction, JournalInitialPosition)} * which will pass only {@link EntryEventType#ADDED * ADDED} and {@link EntryEventType#UPDATED UPDATED} * events and will project the event's key and new value into a {@code * Map.Entry}. * <p> * <strong>NOTE:</strong> Jet only remembers the name of the map you supply * and acquires a map with that name on the local cluster. If you supply a * map instance from another cluster, no error will be thrown to indicate * this. */ @Nonnull public static <K, V> StreamSource<Entry<K, V>> mapJournal( @Nonnull IMap<? extends K, ? extends V> map, @Nonnull JournalInitialPosition initialPos ) { return mapJournal(map.getName(), mapPutEvents(), mapEventToEntry(), initialPos); }
/** * Returns a source that fetches entries from the given Hazelcast {@code * IMap} and emits them as {@code Map.Entry}. It leverages data locality * by making each of the underlying processors fetch only those entries * that are stored on the member where it is running. * <p> * <strong>NOTE:</strong> Jet only remembers the name of the map you supply * and acquires a map with that name on the local cluster. If you supply a * map instance from another cluster, no error will be thrown to indicate * this. * <p> * The source does not save any state to snapshot. If the job is restarted, * it will re-emit all entries. * <p> * If the {@code IMap} is modified while being read, or if there is a * cluster topology change (triggering data migration), the source may * miss and/or duplicate some entries. * <p> * The default local parallelism for this processor is 2 (or 1 if just 1 * CPU is available). */ @Nonnull public static <K, V> BatchSource<Entry<K, V>> map(@Nonnull IMap<? extends K, ? extends V> map) { return map(map.getName()); }
/** * Convenience for {@link #remoteMap(String, ClientConfig, Predicate, Projection)} * which use a {@link DistributedFunction} as the projection function. */ @Nonnull public static <T, K, V> BatchSource<T> remoteMap( @Nonnull String mapName, @Nonnull ClientConfig clientConfig, @Nonnull Predicate<? super K, ? super V> predicate, @Nonnull DistributedFunction<? super Entry<K, V>, ? extends T> projectionFn ) { return batchFromProcessor("remoteMapSource(" + mapName + ')', readRemoteMapP(mapName, clientConfig, predicate, projectionFn)); }
@SuppressWarnings("Convert2MethodRef") // https://bugs.openjdk.java.net/browse/JDK-8154236 private static Pipeline coGroupDirect() { Pipeline p = Pipeline.create(); // Create three source streams BatchStageWithKey<PageVisit, Integer> pageVisits = p.drawFrom(Sources.<PageVisit>list(PAGE_VISIT)) .groupingKey(pageVisit -> pageVisit.userId()); BatchStageWithKey<AddToCart, Integer> addToCarts = p.drawFrom(Sources.<AddToCart>list(ADD_TO_CART)) .groupingKey(addToCart -> addToCart.userId()); BatchStageWithKey<Payment, Integer> payments = p.drawFrom(Sources.<Payment>list(PAYMENT)) .groupingKey(payment -> payment.userId()); // Construct the co-group transform. The aggregate operation collects all // the stream items into a 3-tuple of lists. BatchStage<Entry<Integer, Tuple3<List<PageVisit>, List<AddToCart>, List<Payment>>>> coGrouped = pageVisits.aggregate3(toList(), addToCarts, toList(), payments, toList()); // Store the results in the output map coGrouped.drainTo(Sinks.map(RESULT)); return p; }
filesBuilder(sourceFile.getParent().toString()) .glob(sourceFile.getFileName().toString()) .build((filename, line) -> {
private static Pipeline buildPipeline(String sourceDir, String targetDir) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.files(sourceDir)) .map(LogLine::parse) .filter((LogLine log) -> log.getResponseCode() >= 200 && log.getResponseCode() < 400) .flatMap(AccessLogAnalyzer::explodeSubPaths) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.files(targetDir)); return p; }
@Nonnull JournalInitialPosition initialPos ) { return mapJournal(map.getName(), predicateFn, projectionFn, initialPos);
@Nonnull Projection<? super Entry<K, V>, ? extends T> projection ) { return map(map.getName(), predicate, projection);