/** * Method setProperty sets the value to the given key using the {@link Mode#REPLACE} mode. * * @param key the key * @param value the value * @return the current ConfigDef instance */ public ConfigDef setProperty( String key, String value ) { return setProperty( Mode.REPLACE, key, value ); }
public ConfigDef setProperties( ConfigDef configDef, ConfigDef.Mode mode ) { Properties properties = buildProperties(); for( String name : properties.stringPropertyNames() ) configDef.setProperty( mode, name, properties.getProperty( name ) ); return configDef; }
private Pipe getBloomFilterPipe(Pipe largePipe, Fields largeJoinFields, Pipe smallPipe, Fields smallJoinFields) throws IOException { String bloomJobID = UUID.randomUUID().toString(); Path bloomTempDir = FileSystemHelper.getRandomTemporaryPath("/tmp/bloom_tmp/"); String bloomPartsDir = bloomTempDir + "/parts"; String bloomFinalFilter = bloomTempDir + "/filter.bloomfilter"; String approxCountPartsDir = bloomTempDir + "/approx_distinct_keys_parts/"; Pipe filterPipe; smallPipe = new Each(smallPipe, smallJoinFields, new GetSerializedTuple()); smallPipe = new CreateBloomFilter(smallPipe, bloomJobID, approxCountPartsDir, bloomPartsDir, "serialized-tuple-key"); // This is a bit of a hack to: // 1) Force a dependency on the operations performed on RHS above (can't continue until they're done) // 2) Bind RHS to the flow, which wouldn't happen otherwise. // Note that RHS has no output, so there shouldn't be any danger in doing this. filterPipe = new NaiveMerge(largePipe.getName(), largePipe, smallPipe); // Load the bloom filter into memory and apply it to the LHS. filterPipe = new Each(filterPipe, largeJoinFields, new BloomJoinFilter(bloomJobID, false)); ConfigDef config = filterPipe.getStepConfigDef(); // tell BloomAssemblyStrategy which bloom filter to expect config.setProperty(BloomProps.SOURCE_BLOOM_FILTER_ID, bloomJobID); config.setProperty(BloomProps.REQUIRED_BLOOM_FILTER_PATH, bloomFinalFilter); return filterPipe; }
public CreateBloomFilter(Pipe keys, String bloomFilterID, String approxCountPartsDir, String bloomPartsDir, String keyBytesField, HashFunctionFactory hashFactory) throws IOException { super(keys); Pipe smallPipe = new Each(keys, new Fields(keyBytesField), new GetIndices(hashFactory), new Fields("split", "index", "hash_num")); smallPipe = new Each(smallPipe, new Fields("split", "index", "hash_num"), new Unique.FilterPartialDuplicates()); smallPipe = new GroupBy(smallPipe, new Fields("split")); smallPipe = new Every(smallPipe, new Fields("index", "hash_num"), new CreateBloomFilterFromIndices(), Fields.ALL); ConfigDef bloomDef = smallPipe.getStepConfigDef(); bloomDef.setProperty(BloomProps.BLOOM_FILTER_PARTS_DIR, bloomPartsDir); bloomDef.setProperty(BloomProps.BLOOM_KEYS_COUNTS_DIR, approxCountPartsDir); bloomDef.setProperty(BloomProps.TARGET_BLOOM_FILTER_ID, bloomFilterID); setTails(smallPipe); }
private Tap makeTempTap( FlowElementGraph graph, Pipe pipe, String defaultDecoratorClassName ) { Tap checkpointTap = graph.getCheckpointsMap().get( pipe.getName() ); if( checkpointTap != null ) { LOG.info( "found checkpoint: {}, using tap: {}", pipe.getName(), checkpointTap ); checkpointTap = decorateTap( pipe, checkpointTap, FlowConnectorProps.CHECKPOINT_TAP_DECORATOR_CLASS, null ); } if( checkpointTap == null ) { // only restart from a checkpoint pipe or checkpoint tap below if( pipe instanceof Checkpoint ) { checkpointTap = makeTempTap( checkpointTapRootPath, pipe.getName() ); checkpointTap = decorateTap( pipe, checkpointTap, FlowConnectorProps.CHECKPOINT_TAP_DECORATOR_CLASS, null ); // mark as an anonymous checkpoint checkpointTap.getConfigDef().setProperty( ConfigDef.Mode.DEFAULT, "cascading.checkpoint", "true" ); } else { checkpointTap = makeTempTap( pipe.getName() ); } } return decorateTap( pipe, checkpointTap, FlowConnectorProps.TEMPORARY_TAP_DECORATOR_CLASS, defaultDecoratorClassName ); }
@Test public void testSubAssemblyConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20 ); Pipe pipe = new Pipe( "test" ); pipe = new ConfigSubAssembly( pipe, getPlatform().isDAG() ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "default", "pipe-default" ); // steps on above value pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "replace", "pipe-default" ); pipe.getConfigDef().setProperty( Mode.REPLACE, "replace", "pipe-replace" ); pipe.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); pipe.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Tap sink = getPlatform().getTextFile( getOutputPath( "subassembly-configdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); } }
@Test public void testSubAssemblyConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20 ); Pipe pipe = new Pipe( "test" ); pipe = new ConfigSubAssembly( pipe, getPlatform().isDAG() ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "default", "pipe-default" ); // steps on above value pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "replace", "pipe-default" ); pipe.getConfigDef().setProperty( Mode.REPLACE, "replace", "pipe-replace" ); pipe.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); pipe.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Tap sink = getPlatform().getTextFile( getOutputPath( "subassembly-configdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); } }
@Test public void testTapSourceConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap source = getPlatform().getTap( scheme, inputFileNums20, SinkMode.KEEP ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls source.getConfigDef().setProperty( Mode.DEFAULT, "default", "source-default" ); // steps on above value source.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); source.getConfigDef().setProperty( Mode.DEFAULT, "replace", "source-default" ); source.getConfigDef().setProperty( Mode.REPLACE, "replace", "source-replace" ); source.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); source.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Tap sink = getPlatform().getTextFile( getOutputPath( "tapsourceconfigdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testTapSinkConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20, SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap sink = getPlatform().getTap( scheme, getOutputPath( "tapsinkconfigdef" ), SinkMode.REPLACE ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls sink.getConfigDef().setProperty( Mode.DEFAULT, "default", "sink-default" ); // steps on above value sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); sink.getConfigDef().setProperty( Mode.DEFAULT, "replace", "sink-default" ); sink.getConfigDef().setProperty( Mode.REPLACE, "replace", "sink-replace" ); sink.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); sink.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testPipeConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20 ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new IterateInsert( new Fields( "value" ), getPlatform().isDAG() ), Fields.ALL ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "default", "pipe-default" ); // steps on above value pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "replace", "pipe-default" ); pipe.getConfigDef().setProperty( Mode.REPLACE, "replace", "pipe-replace" ); pipe.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); pipe.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Tap sink = getPlatform().getTextFile( getOutputPath( "configdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testPipeConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20 ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new IterateInsert( new Fields( "value" ), getPlatform().isDAG() ), Fields.ALL ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "default", "pipe-default" ); // steps on above value pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); pipe.getConfigDef().setProperty( Mode.DEFAULT, "replace", "pipe-default" ); pipe.getConfigDef().setProperty( Mode.REPLACE, "replace", "pipe-replace" ); pipe.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); pipe.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); pipe.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Tap sink = getPlatform().getTextFile( getOutputPath( "configdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testTapSourceConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap source = getPlatform().getTap( scheme, inputFileNums20, SinkMode.KEEP ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls source.getConfigDef().setProperty( Mode.DEFAULT, "default", "source-default" ); // steps on above value source.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); source.getConfigDef().setProperty( Mode.DEFAULT, "replace", "source-default" ); source.getConfigDef().setProperty( Mode.REPLACE, "replace", "source-replace" ); source.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); source.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); source.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Tap sink = getPlatform().getTextFile( getOutputPath( "tapsourceconfigdef" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }
@Test public void testTapSinkConfigDef() throws IOException { getPlatform().copyFromLocal( inputFileNums20 ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileNums20, SinkMode.KEEP ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Insert( new Fields( "value" ), "nada" ), Fields.ALL ); Scheme scheme = getPlatform().getTestConfigDefScheme(); Tap sink = getPlatform().getTap( scheme, getOutputPath( "tapsinkconfigdef" ), SinkMode.REPLACE ); // process -> after sink/sourceConfInit are called // default -> Wrapper for all cluster side calls sink.getConfigDef().setProperty( Mode.DEFAULT, "default", "sink-default" ); // steps on above value sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default", "process-default" ); sink.getConfigDef().setProperty( Mode.DEFAULT, "replace", "sink-default" ); sink.getConfigDef().setProperty( Mode.REPLACE, "replace", "sink-replace" ); sink.getNodeConfigDef().setProperty( Mode.REPLACE, "default-node", "node-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "replace", "process-default" ); sink.getStepConfigDef().setProperty( Mode.REPLACE, "replace", "process-replace" ); sink.getStepConfigDef().setProperty( Mode.DEFAULT, "default-node", "process-default" ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); assertTrue( flow.resourceExists( sink ) ); }