public static Pipeline build(String bootstrapServers) { Properties properties = new Properties(); properties.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString()); properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName()); properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getCanonicalName()); properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); Pipeline pipeline = Pipeline.create(); pipeline .drawFrom(KafkaSources.kafka(properties, Constants.TOPIC_NAME_PRECIOUS)) .drainTo(Sinks.map(Constants.IMAP_NAME_PRECIOUS)); return pipeline; }
/** * Creates and returns an executable job based on the supplied pipeline. * Jet will asynchronously start executing the job. */ @Nonnull default Job newJob(@Nonnull Pipeline pipeline) { return newJob(pipeline.toDag()); }
/** * This will take the contents of source map and writes it into the sink map. */ private static Pipeline mapSourceAndSink(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.map(sourceMapName)) .drainTo(Sinks.map(sinkMapName)); return pipeline; }
/** * Creates and returns a Jet job based on the supplied pipeline and job * configuration. Jet will asynchronously start executing the job. * * <p>If the name in the JobConfig is non-null, Jet checks if there is an * active job with equal name, in which case it throws {@link * JobAlreadyExistsException}. Job is active if it is running, * suspended or waiting to be run; that is it has not completed or failed. * Thus there can be at most one active job with a given name at a time and * you can re-use the job name after the previous job completed. * * <p>See also {@link #newJobIfAbsent}. * * @throws JobAlreadyExistsException if there is an active job with * an equal name */ @Nonnull default Job newJob(@Nonnull Pipeline pipeline, @Nonnull JobConfig config) { return newJob(pipeline.toDag(), config); }
public static Pipeline build() { Pipeline p = Pipeline.create(); // Palladium and Platinum only p.drawFrom(Sources.<String, Object>mapJournal( Constants.IMAP_NAME_PRECIOUS, JournalInitialPosition.START_FROM_OLDEST) ).map(e -> e.getKey() + "==" + e.getValue()) .filter(str -> str.toLowerCase().startsWith("p")) .drainTo(Sinks.logger()) ; return p; }
/** * Creates and returns a Jet job based on the supplied pipeline and job * configuration. Jet will asynchronously start executing the job. * * <p>If the name in the JobConfig is non-null, Jet checks if there is an * active job with equal name. If there is, it will join that job instead * of submitting a new one. Job is active if it is running, suspended or * waiting to be run; that is it has not completed or failed. In other * words, this method ensures that the job with this name is running and is * not running multiple times in parallel. * * <p>This method is useful for microservices deployment when each package * contains a jet member and the job and you want the job to run only once. * But if the job is a batch job and runs very quickly, it can happen that * it executes multiple times, because the job name can be reused after a * previous execution completed. * * <p>If the job name is null, a new job is always submitted. * * <p>See also {@link #newJob}. */ @Nonnull default Job newJobIfAbsent(@Nonnull Pipeline pipeline, @Nonnull JobConfig config) { return newJobIfAbsent(pipeline.toDag(), config); }
private static Pipeline buildPipeline(String connectionUrl) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.jdbc(connectionUrl, "SELECT * FROM " + TABLE_NAME, resultSet -> new User(resultSet.getInt(1), resultSet.getString(2)))) .map(user -> Util.entry(user.getId(), user)) .drainTo(Sinks.map(MAP_NAME)); return p; }
/** * This will take the contents of source map, converts values to the string and * suffixes the value with {@code odd} if the key is odd and with {@code event} if the key is even. */ private static Pipeline mapWithUpdating(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .map(e -> entry(e.getKey(), String.valueOf(e.getValue()))) .drainTo( Sinks.mapWithUpdating( sinkMapName, (oldValue, item) -> item.getKey() % 2 == 0 ? oldValue + "-even" : oldValue + "-odd" ) ); return pipeline; }
private static Pipeline buildPipeline(String connectionUrl) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Integer, User>map(MAP_NAME)) .map(Map.Entry::getValue) .drainTo(Sinks.jdbc("INSERT INTO " + TABLE_NAME + "(id, name) VALUES(?, ?)", connectionUrl, (stmt, user) -> { // Bind the values from the stream item to a PreparedStatement created from // the above query. stmt.setInt(1, user.getId()); stmt.setString(2, user.getName()); })); return p; }
/** * This will take the contents of source map, maps all keys to a key called {@code sum } * and write it into sink map using an merge function which merges the map values by adding * old value and new value. */ private static Pipeline mapWithMerging(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .map(e -> entry("sum", e.getValue())) .drainTo( Sinks.mapWithMerging( sinkMapName, (oldValue, newValue) -> oldValue + newValue ) ); return pipeline; }
private static Pipeline buildPipeline() { Pattern delimiter = Pattern.compile("\\W+"); Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Long, String>map(BOOK_LINES)) .flatMap(e -> traverseArray(delimiter.split(e.getValue().toLowerCase()))) .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(COUNTS)); return p; }
public static Pipeline buildPipeline(String sourceName, String sinkName) { Pattern pattern = Pattern.compile("\\W+"); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, String>map(sourceName)) .flatMap(e -> Traversers.traverseArray(pattern.split(e.getValue().toLowerCase())) .filter(w -> !w.isEmpty())) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(sinkName)); return pipeline; }
private static Pipeline buildPipeline(JobConf jobConfig) { Pipeline p = Pipeline.create(); p.drawFrom(HdfsSources.<AvroWrapper<User>, NullWritable>hdfs(jobConfig)) .filter(entry -> entry.getKey().datum().get(3).equals(Boolean.TRUE)) .peek(entry -> entry.getKey().datum().toString()) .drainTo(HdfsSinks.hdfs(jobConfig)); return p; }
/** * This will take the contents of source map and apply entry processor to * increment the values by 5. */ private static Pipeline mapWithEntryProcessor(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .drainTo( Sinks.mapWithEntryProcessor( sinkMapName, entryKey(), item -> new IncrementEntryProcessor(5) ) ); return pipeline; }
@RequestMapping("/submitJob") public void submitJob() { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(CustomSourceP.customSource()) .drainTo(Sinks.logger()); JobConfig jobConfig = new JobConfig() .addClass(SpringBootSample.class) .addClass(CustomSourceP.class); instance.newJob(pipeline, jobConfig).join(); }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Trade, Integer, Trade>mapJournal(TRADES_MAP_NAME, DistributedPredicate.alwaysTrue(), EventJournalMapEvent::getNewValue, START_FROM_CURRENT)) .groupingKey(Trade::getTicker) .rollingAggregate(summingLong(Trade::getPrice)) .drainTo(Sinks.map(VOLUME_MAP_NAME)); return p; }