/** * This will take the contents of source map, converts values to the string and * suffixes the value with {@code odd} if the key is odd and with {@code event} if the key is even. */ private static Pipeline mapWithUpdating(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .map(e -> entry(e.getKey(), String.valueOf(e.getValue()))) .drainTo( Sinks.mapWithUpdating( sinkMapName, (oldValue, item) -> item.getKey() % 2 == 0 ? oldValue + "-even" : oldValue + "-odd" ) ); return pipeline; }
private static Pipeline buildPipeline() { Pattern delimiter = Pattern.compile("\\W+"); Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Long, String>map(BOOK_LINES)) .flatMap(e -> traverseArray(delimiter.split(e.getValue().toLowerCase()))) .filter(word -> !word.isEmpty()) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(COUNTS)); return p; }
public static Pipeline buildPipeline(String sourceName, String sinkName) { Pattern pattern = Pattern.compile("\\W+"); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, String>map(sourceName)) .flatMap(e -> Traversers.traverseArray(pattern.split(e.getValue().toLowerCase())) .filter(w -> !w.isEmpty())) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.map(sinkName)); return pipeline; }
/** * This will take the contents of source map and writes it into the sink map. */ private static Pipeline mapSourceAndSink(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.map(sourceMapName)) .drainTo(Sinks.map(sinkMapName)); return pipeline; }
private static Pipeline buildPipeline(String connectionUrl) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.jdbc(connectionUrl, "SELECT * FROM " + TABLE_NAME, resultSet -> new User(resultSet.getInt(1), resultSet.getString(2)))) .map(user -> Util.entry(user.getId(), user)) .drainTo(Sinks.map(MAP_NAME)); return p; }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.files(getBooksPath())) .filter(line -> line.startsWith("The ")) .drainTo(buildTopicSink()); return p; }
private static Pipeline buildPipeline(String connectionUrl) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<Integer, User>map(MAP_NAME)) .map(Map.Entry::getValue) .drainTo(Sinks.jdbc("INSERT INTO " + TABLE_NAME + "(id, name) VALUES(?, ?)", connectionUrl, (stmt, user) -> { // Bind the values from the stream item to a PreparedStatement created from // the above query. stmt.setInt(1, user.getId()); stmt.setString(2, user.getName()); })); return p; }
private static Pipeline buildPipeline(JobConf jobConfig) { Pipeline p = Pipeline.create(); p.drawFrom(HdfsSources.<AvroWrapper<User>, NullWritable>hdfs(jobConfig)) .filter(entry -> entry.getKey().datum().get(3).equals(Boolean.TRUE)) .peek(entry -> entry.getKey().datum().toString()) .drainTo(HdfsSinks.hdfs(jobConfig)); return p; }
/** * Builds and returns the Pipeline which represents the actual computation. * To compute the probability of finding word B after A, one has to know * how many pairs contain word A as a first entry and how many of them * contain B as a second entry. The pipeline creates pairs from consecutive * words and computes the probabilities of A->B. */ private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); // Reads files line-by-line BatchStage<String> lines = p.drawFrom(Sources.<String>files(INPUT_FILE)); Pattern twoWords = Pattern.compile("(\\.|\\w+)\\s(\\.|\\w+)"); // Calculates probabilities by flatmapping lines into two-word consecutive pairs using regular expressions // and aggregates them into an IMap. lines.flatMap(e -> traverseMatcher(twoWords.matcher(e.toLowerCase()), m -> tuple2(m.group(1), m.group(2)))) .groupingKey(Tuple2::f0) .aggregate(buildAggregateOp()) .drainTo(Sinks.map("stateTransitions")); return p; }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.<String, User>map(MAP_NAME)) .map(Map.Entry::getValue) .drainTo(AvroSinks.files(DIRECTORY_NAME, AvroSink::schemaForUser, User.class)); return p; }
public static void main(String[] args) { ApplicationContext context = new AnnotationConfigApplicationContext(AppConfig.class); JetInstance jet = context.getBean(JetInstance.class); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(CustomSourceP.customSource()) .drainTo(Sinks.logger()); JobConfig jobConfig = new JobConfig() .addClass(AnnotationBasedConfigurationSample.class) .addClass(CustomSourceP.class); jet.newJob(pipeline, jobConfig).join(); jet.shutdown(); } }
/** * This will take the contents of source map, maps all keys to a key called {@code sum } * and write it into sink map using an merge function which merges the map values by adding * old value and new value. */ private static Pipeline mapWithMerging(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .map(e -> entry("sum", e.getValue())) .drainTo( Sinks.mapWithMerging( sinkMapName, (oldValue, newValue) -> oldValue + newValue ) ); return pipeline; }
@RequestMapping("/submitJob") public void submitJob() { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(CustomSourceP.customSource()) .drainTo(Sinks.logger()); JobConfig jobConfig = new JobConfig() .addClass(SpringBootSample.class) .addClass(CustomSourceP.class); instance.newJob(pipeline, jobConfig).join(); }
/** * This will take the contents of source map and apply entry processor to * increment the values by 5. */ private static Pipeline mapWithEntryProcessor(String sourceMapName, String sinkMapName) { Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(Sources.<Integer, Integer>map(sourceMapName)) .drainTo( Sinks.mapWithEntryProcessor( sinkMapName, entryKey(), item -> new IncrementEntryProcessor(5) ) ); return pipeline; }
/** * Helper method to construct the pipeline for the job * * @return the pipeline for the job */ public static Pipeline buildPipeline() { final Pipeline p = Pipeline.create(); // Compute map server side final BatchStage<Horse> c = p.drawFrom(Sources.map(EVENTS_BY_NAME, t -> true, HORSE_FROM_EVENT)); final BatchStage<Entry<Horse, Long>> c2 = c.groupingKey(wholeItem()) .aggregate(counting()) .filter(ent -> ent.getValue() > 1); c2.drainTo(Sinks.map(MULTIPLE)); return p; }
public static void main(String[] args) { ApplicationContext context = new GenericXmlApplicationContext("application-context-with-schema.xml"); JetInstance jetInstance = (JetInstance) context.getBean("instance"); JetInstance jetClient = (JetInstance) context.getBean("client"); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(CustomSourceP.customSource()) .drainTo(Sinks.logger()); JobConfig jobConfig = new JobConfig() .addClass(XmlConfigurationWithSchemaSample.class) .addClass(CustomSourceP.class); jetClient.newJob(pipeline, jobConfig).join(); jetClient.shutdown(); jetInstance.shutdown(); } }
public static void main(String[] args) { ApplicationContext context = new GenericXmlApplicationContext("application-context.xml"); JetInstance jetInstance = (JetInstance) context.getBean("instance"); JetInstance jetClient = (JetInstance) context.getBean("client"); Pipeline pipeline = Pipeline.create(); pipeline.drawFrom(CustomSourceP.customSource()) .drainTo(Sinks.logger()); JobConfig jobConfig = new JobConfig() .addClass(XmlConfigurationSample.class) .addClass(CustomSourceP.class); jetClient.newJob(pipeline, jobConfig).join(); jetClient.shutdown(); jetInstance.shutdown(); } }
private static Pipeline buildPipeline() { Pipeline p = Pipeline.create(); p.drawFrom(AvroSources.filesBuilder(AvroSink.DIRECTORY_NAME, ReflectDatumReader<User>::new) //Both Jet members share the same local file system .sharedFileSystem(true) .build()) .map(user -> Util.entry(user.getUsername(), user)) .drainTo(Sinks.map(AvroSink.MAP_NAME)); return p; }
private static Pipeline buildPipeline(String sourceDir, String targetDir) { Pipeline p = Pipeline.create(); p.drawFrom(Sources.files(sourceDir)) .map(LogLine::parse) .filter((LogLine log) -> log.getResponseCode() >= 200 && log.getResponseCode() < 400) .flatMap(AccessLogAnalyzer::explodeSubPaths) .groupingKey(wholeItem()) .aggregate(counting()) .drainTo(Sinks.files(targetDir)); return p; }
@Test(timeout = 20000) public void test() { Pipeline p = Pipeline.create(); p.drawFrom(Sources.batchFromProcessor("source", preferLocalParallelismOne(CustomSourceP::new))) .drainTo(Sinks.fromProcessor("sink", preferLocalParallelismOne(CustomSinkP::new))); jetInstance.newJob(p).join(); }