/** * Convenience for {@link #map(String, Predicate, Projection)} * which uses a {@link DistributedFunction} as the projection function. */ @Nonnull public static <T, K, V> BatchSource<T> map( @Nonnull String mapName, @Nonnull Predicate<? super K, ? super V> predicate, @Nonnull DistributedFunction<? super Map.Entry<K, V>, ? extends T> projectionFn ) { return batchFromProcessor("mapSource(" + mapName + ')', readMapP(mapName, predicate, projectionFn)); }
/** * Returns a supplier of processors for * {@link Sources#mapJournal(String, JournalInitialPosition)} )}. */ @Nonnull public static <K, V> ProcessorMetaSupplier streamMapP( @Nonnull String mapName, @Nonnull JournalInitialPosition initialPos, @Nonnull EventTimePolicy<? super Entry<K, V>> eventTimePolicy ) { return streamMapP(mapName, mapPutEvents(), mapEventToEntry(), initialPos, eventTimePolicy); }
/** * Returns a source that fetches entries from a Hazelcast {@code ICache} * with the given name and emits them as {@code Map.Entry}. It leverages * data locality by making each of the underlying processors fetch only * those entries that are stored on the member where it is running. * <p> * The source does not save any state to snapshot. If the job is restarted, * it will re-emit all entries. * <p> * If the {@code ICache} is modified while being read, or if there is a * cluster topology change (triggering data migration), the source may * miss and/or duplicate some entries. * <p> * The default local parallelism for this processor is 2 (or 1 if just 1 * CPU is available). */ @Nonnull public static <K, V> BatchSource<Entry<K, V>> cache(@Nonnull String cacheName) { return batchFromProcessor("cacheSource(" + cacheName + ')', readCacheP(cacheName)); }
/** * Convenience for {@link Sources#jdbc(DistributedSupplier, * ToResultSetFunction, DistributedFunction)}. * A non-distributed, single-worker source which fetches the whole resultSet * with a single query on single member. * <p> * This method executes exactly one query in the target database. If the * underlying table is modified while being read, the behavior depends on * the configured transaction isolation level in the target database. Refer * to the documentation for the target database system. * <p> * Example: <pre>{@code * p.drawFrom(Sources.jdbc( * DB_CONNECTION_URL, * "select ID, NAME from PERSON", * resultSet -> new Person(resultSet.getInt(1), resultSet.getString(2)))) * }</pre> */ public static <T> BatchSource<T> jdbc( @Nonnull String connectionURL, @Nonnull String query, @Nonnull DistributedFunction<? super ResultSet, ? extends T> createOutputFn ) { return batchFromProcessor("jdbcSource", SourceProcessors.readJdbcP(connectionURL, query, createOutputFn)); } }
/** * Returns a source that emits items retrieved from a Hazelcast {@code * IList}. All elements are emitted on a single member — the one * where the entire list is stored by the IMDG. * <p> * If the {@code IList} is modified while being read, the source may miss * and/or duplicate some entries. * <p> * The source does not save any state to snapshot. If the job is restarted, * it will re-emit all entries. * <p> * The default local parallelism for this processor is 1. */ @Nonnull public static <T> BatchSource<T> list(@Nonnull String listName) { return batchFromProcessor("listSource(" + listName + ')', readListP(listName)); }
/** * Builds a custom file {@link BatchSource} with supplied components and the * output function {@code mapOutputFn}. * <p> * The source does not save any state to snapshot. If the job is restarted, * it will re-emit all entries. * <p> * Any {@code IOException} will cause the job to fail. The files must not * change while being read; if they do, the behavior is unspecified. * <p> * The default local parallelism for this processor is 2 (or 1 if just 1 * CPU is available). * * @param mapOutputFn the function which creates output object from each * line. Gets the filename and line as parameters * @param <T> the type of the items the source emits */ public <T> BatchSource<T> build(DistributedBiFunction<String, String, ? extends T> mapOutputFn) { return batchFromProcessor("filesSource(" + new File(directory, glob) + ')', SourceProcessors.readFilesP(directory, charset, glob, sharedFileSystem, mapOutputFn)); }
) { return batchFromProcessor("jdbcSource", SourceProcessors.readJdbcP(connectionSupplier, resultSetFn, createOutputFn));
/** * Returns a source that fetches entries from a local Hazelcast {@code IMap} * with the specified name and emits them as {@code Map.Entry}. It leverages * data locality by making each of the underlying processors fetch only those * entries that are stored on the member where it is running. * <p> * The source does not save any state to snapshot. If the job is restarted, * it will re-emit all entries. * <p> * If the {@code IMap} is modified while being read, or if there is a * cluster topology change (triggering data migration), the source may * miss and/or duplicate some entries. * <p> * The default local parallelism for this processor is 2 (or 1 if just 1 * CPU is available). */ @Nonnull public static <K, V> BatchSource<Entry<K, V>> map(@Nonnull String mapName) { return batchFromProcessor("mapSource(" + mapName + ')', readMapP(mapName)); }
) { return streamFromProcessorWithWatermarks("mapJournalSource(" + mapName + ')', w -> streamMapP(mapName, predicateFn, projectionFn, initialPos, w), false);
@Nonnull Projection<? super Entry<K, V>, ? extends T> projection ) { return batchFromProcessor("mapSource(" + mapName + ')', readMapP(mapName, predicate, projection));
SourceProcessors.<Trade, Long, Trade>streamMapP(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, JournalInitialPosition.START_FROM_OLDEST,
public static void main(String[] args) throws Exception { System.setProperty("hazelcast.logging.type", "log4j"); Jet.newJetInstance(); JetInstance jet = Jet.newJetInstance(); try { IMapJet<Object, Object> map = jet.getMap("map"); range(0, COUNT).parallel().forEach(i -> map.put("key-" + i, i)); DAG dag = new DAG(); Vertex source = dag.newVertex("map-source", SourceProcessors.readMapP(map.getName())); Vertex sink = dag.newVertex("file-sink", new WriteFilePSupplier(OUTPUT_FOLDER)); dag.edge(between(source, sink)); jet.newJob(dag).join(); System.out.println("\nHazelcast IMap dumped to folder " + new File(OUTPUT_FOLDER).getAbsolutePath()); } finally { Jet.shutdownAll(); } } }
SourceProcessors.<Trade, Long, Trade>streamMapP(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, JournalInitialPosition.START_FROM_OLDEST, wmGenParams(
private void rewriteDagWithSnapshotRestore(DAG dag, long snapshotId, String mapName) { IMap<Object, Object> snapshotMap = nodeEngine.getHazelcastInstance().getMap(mapName); snapshotId = SnapshotValidator.validateSnapshot(snapshotId, jobIdString(), snapshotMap); logger.info("State of " + jobIdString() + " will be restored from snapshot " + snapshotId + ", map=" + mapName); List<Vertex> originalVertices = new ArrayList<>(); dag.iterator().forEachRemaining(originalVertices::add); Map<String, Integer> vertexToOrdinal = new HashMap<>(); Vertex readSnapshotVertex = dag.newVertex(SNAPSHOT_VERTEX_PREFIX + "read", readMapP(mapName)); long finalSnapshotId = snapshotId; Vertex explodeVertex = dag.newVertex(SNAPSHOT_VERTEX_PREFIX + "explode", () -> new ExplodeSnapshotP(vertexToOrdinal, finalSnapshotId)); dag.edge(between(readSnapshotVertex, explodeVertex).isolated()); int index = 0; // add the edges for (Vertex userVertex : originalVertices) { vertexToOrdinal.put(userVertex.getName(), index); int destOrdinal = dag.getInboundEdges(userVertex.getName()).size(); dag.edge(new SnapshotRestoreEdge(explodeVertex, index, userVertex, destOrdinal)); index++; } }
public static CompletableFuture<Void> copyMapUsingJob(JetInstance instance, int queueSize, String sourceMap, String targetMap) { DAG dag = new DAG(); Vertex source = dag.newVertex("readMap(" + sourceMap + ')', readMapP(sourceMap)); Vertex sink = dag.newVertex("writeMap(" + targetMap + ')', writeMapP(targetMap)); dag.edge(between(source, sink).setConfig(new EdgeConfig().setQueueSize(queueSize))); JobConfig jobConfig = new JobConfig() .setName("copy-" + sourceMap + "-to-" + targetMap); return instance.newJob(dag, jobConfig).getFuture(); } }
Vertex source = dag.newVertex("source", readMapP(DOCID_NAME));
Vertex docSource = dag.newVertex("doc-source", readMapP(DOCID_NAME));
Vertex readTickerInfoMap = dag.newVertex("readTickerInfoMap", readMapP(TICKER_INFO_MAP_NAME)); Vertex collectToMap = dag.newVertex("collectToMap", Processors.aggregateP(AggregateOperations.toMap(entryKey(), entryValue())));