@Override public void flowConfInit(Flow<JobConf> flow) { CascadingUtils.addSerializationToken(flow.getConfig()); }
protected String getVertex( Flow flow, Tap tap ) { return tap.getFullIdentifier( flow.getConfig() ); } }
@Override public void flowConfInit(Flow<JobConf> flow) { CascadingUtils.addSerializationToken(flow.getConfig()); }
/** * Method addFlow adds a new {@link cascading.flow.Flow} instance that is intended to participate in a {@link Cascade}. * * @param flow of Flow * @return CascadeDef */ public CascadeDef addFlow( Flow flow ) { if( flow == null ) return this; if( flows.containsKey( flow.getName() ) ) throw new CascadeException( "all flow names must be unique, found duplicate: " + flow.getName() ); Collection<Tap> sinks = flow.getSinksCollection(); for( Tap sink : sinks ) { String fullIdentifier = sink.getFullIdentifier( flow.getConfig() ); for( Flow existingFlow : flows.values() ) { Collection<Tap> existingSinks = existingFlow.getSinksCollection(); for( Tap existingSink : existingSinks ) { if( fullIdentifier.equals( existingSink.getFullIdentifier( existingFlow.getConfig() ) ) ) throw new CascadeException( "the flow: " + flow.getName() + ", has a sink identifier: " + fullIdentifier + ", in common with the flow: " + existingFlow.getName() ); } } } flows.put( flow.getName(), flow ); return this; }
DomainBuilderOutputCommitter.setupJob(properties.getDomainName(), flow.getConfig()); DomainBuilderOutputCommitter.commitJob(properties.getDomainName(), flow.getConfig()); DomainBuilderOutputCommitter.cleanupJob(properties.getDomainName(), flow.getConfig()); DomainBuilderOutputCommitter.cleanupJob(properties.getDomainName(), flow.getConfig()); return flow;
DomainBuilderOutputCommitter.setupJob(domainBuilder.properties.getDomainName(), flow.getConfig()); DomainBuilderOutputCommitter.commitJob(domainBuilder.properties.getDomainName(), flow.getConfig()); DomainBuilderOutputCommitter.cleanupJob(domainBuilder.properties.getDomainName(), flow.getConfig()); DomainBuilderOutputCommitter.cleanupJob(domainBuilder.properties.getDomainName(), flow.getConfig());
if( flow.getSink() != sink && sink.resourceExists( flow.getConfig() ) ) count++;
if( flow.getSink() != sink && sink.resourceExists( flow.getConfig() ) ) count++;
@Test public void testSkippedCascade() throws IOException { getPlatform().copyFromLocal( inputFileIps ); String path = "skipped"; Flow first = firstFlow( path + "/first", false ); Flow second = secondFlow( first.getSink(), path + "/second" ); Flow third = thirdFlow( second.getSink(), path + "/third" ); Flow fourth = fourthFlow( third.getSink(), path + "/fourth" ); CountingFlowListener flowListener = new CountingFlowListener(); second.addListener( flowListener ); Cascade cascade = new CascadeConnector( getProperties() ).connect( first, second, third, fourth ); cascade.setFlowSkipStrategy( new FlowSkipStrategy() { public boolean skipFlow( Flow flow ) throws IOException { return true; } } ); cascade.start(); cascade.complete(); assertEquals( 1, flowListener.skipped ); assertFalse( "file exists", fourth.getSink().resourceExists( fourth.getConfig() ) ); }
@Test public void testSkippedCascade() throws IOException { getPlatform().copyFromLocal( inputFileIps ); String path = "skipped"; Flow first = firstFlow( path + "/first", false ); Flow second = secondFlow( first.getSink(), path + "/second" ); Flow third = thirdFlow( second.getSink(), path + "/third" ); Flow fourth = fourthFlow( third.getSink(), path + "/fourth" ); CountingFlowListener flowListener = new CountingFlowListener(); second.addListener( flowListener ); Cascade cascade = new CascadeConnector( getProperties() ).connect( first, second, third, fourth ); cascade.setFlowSkipStrategy( new FlowSkipStrategy() { public boolean skipFlow( Flow flow ) throws IOException { return true; } } ); cascade.start(); cascade.complete(); assertEquals( 1, flowListener.skipped ); assertFalse( "file exists", fourth.getSink().resourceExists( fourth.getConfig() ) ); }
@Test public void testSkipStrategiesKeep() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); // !!! enable replace Tap sink = getPlatform().getTextFile( getOutputPath( "keep" ), SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); sink.deleteResource( flow.getConfig() ); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); flow.complete(); assertTrue( "default skip", flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", new FlowSkipIfSinkExists().skipFlow( flow ) ); validateLength( flow.openSource(), 10 ); // validate source, this once, as a sanity check validateLength( flow, 10, null ); }
@Test public void testSkipStrategiesKeep() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); // !!! enable replace Tap sink = getPlatform().getTextFile( getOutputPath( "keep" ), SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); sink.deleteResource( flow.getConfig() ); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); flow.complete(); assertTrue( "default skip", flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", new FlowSkipIfSinkExists().skipFlow( flow ) ); validateLength( flow.openSource(), 10 ); // validate source, this once, as a sanity check validateLength( flow, 10, null ); }
@Test public void testSkipStrategiesReplace() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); // !!! enable replace Tap sink = getPlatform().getTextFile( getOutputPath( "replace" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); sink.deleteResource( flow.getConfig() ); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); flow.complete(); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); FlowSkipStrategy old = flow.getFlowSkipStrategy(); FlowSkipStrategy replaced = flow.setFlowSkipStrategy( new FlowSkipIfSinkExists() ); assertTrue( "not same instance", old == replaced ); validateLength( flow.openSource(), 10 ); // validate source, this once, as a sanity check validateLength( flow, 10, null ); }
@Test public void testSkipStrategiesReplace() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); // !!! enable replace Tap sink = getPlatform().getTextFile( getOutputPath( "replace" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); sink.deleteResource( flow.getConfig() ); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); flow.complete(); assertTrue( "default skip", !flow.getFlowSkipStrategy().skipFlow( flow ) ); assertTrue( "exist skip", !new FlowSkipIfSinkExists().skipFlow( flow ) ); FlowSkipStrategy old = flow.getFlowSkipStrategy(); FlowSkipStrategy replaced = flow.setFlowSkipStrategy( new FlowSkipIfSinkExists() ); assertTrue( "not same instance", old == replaced ); validateLength( flow.openSource(), 10 ); // validate source, this once, as a sanity check validateLength( flow, 10, null ); }