@Override public void afterPropertiesSet() throws Exception { flow = createFlow(); if (skipStrategy != null) { flow.setFlowSkipStrategy(skipStrategy); } if (stepStrategy != null) { flow.setFlowStepStrategy(stepStrategy); } if (listeners != null) { for (FlowListener listener : listeners) { flow.addListener(listener); } } if (StringUtils.hasText(writeDOT)) { flow.writeDOT(writeDOT); } if (StringUtils.hasText(writeStepsDOT)) { flow.writeStepsDOT(writeStepsDOT); } if (priority != null) { flow.setSubmitPriority(priority); } }
public static void main(String[] args) throws IOException { if (args.length != 1) { System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.BloomJoinExampleWithoutCascadingUtil <output dir>"); return; } String outputDir = args[0]; Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir); Pipe source1 = new Pipe("source1"); Pipe source2 = new Pipe("source2"); Pipe joined = new BloomJoin(source1, new Fields("field1"), source2, new Fields("field3")); Map<String, Tap> sources = new HashMap<String, Tap>(); sources.put("source1", ExampleFixtures.SOURCE_TAP_1); sources.put("source2", ExampleFixtures.SOURCE_TAP_2); // set some default properties and set the flow step strategy Flow f = new HadoopFlowConnector(BloomProps.getDefaultProperties()).connect("Example BloomJoin", sources, sink, joined); f.setFlowStepStrategy(new BloomAssemblyStrategy()); f.complete(); // Take a look at the output tuples TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess()); System.out.println("Output tuples from flow:"); while (output.hasNext()) { System.out.println(output.next().getTuple()); } }
@Test public void testFlowStepStrategy() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "simple" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); final boolean[] wasApplied = {false}; flow.setFlowStepStrategy( new FlowStepStrategy() { @Override public void apply( Flow flow, List predecessorSteps, FlowStep flowStep ) { wasApplied[ 0 ] = true; assertTrue( predecessorSteps.isEmpty() ); } } ); flow.complete(); assertTrue( wasApplied[ 0 ] ); validateLength( flow, 8, null ); }
@Test public void testFlowStepStrategy() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "simple" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); final boolean[] wasApplied = {false}; flow.setFlowStepStrategy( new FlowStepStrategy() { @Override public void apply( Flow flow, List predecessorSteps, FlowStep flowStep ) { wasApplied[ 0 ] = true; assertTrue( predecessorSteps.isEmpty() ); } } ); flow.complete(); assertTrue( wasApplied[ 0 ] ); validateLength( flow, 8, null ); }