public boolean applies( TupleEntry tupleEntry ) { return predictor.matches( tupleEntry.getString( index ) ); } }
public void operate(FlowProcess process, FunctionCall call) { String urlStr = call.getArguments().getString(0); try { URL url = new URL(urlStr); call.getOutputCollector().add( new Tuple(url.getAuthority())); } catch(MalformedURLException e) { } } }
public void operate(FlowProcess process, FunctionCall call) { String str = call.getArguments().getString(0); try { call.getOutputCollector().add( new Tuple(str.getBytes("UTF-8"))); } catch(UnsupportedEncodingException e) { throw new RuntimeException(e); } } }
@Override public boolean isRemove( FlowProcess flowProcess, FilterCall filterCall ) { return partitions.contains( filterCall.getArguments().getString( "number" ) ); } }
public void operate(FlowProcess process, FunctionCall call) { String url = call.getArguments().getString(0); String gran = call.getArguments().getString(1); Integer bucket = call.getArguments().getInteger(2); String keyStr = url + "/" + gran + "-" + bucket; try { call.getOutputCollector().add( new Tuple(keyStr.getBytes("UTF-8"))); } catch(UnsupportedEncodingException e) { throw new RuntimeException(e); } } }
@Override public boolean isRemove( FlowProcess flowProcess, FilterCall filterCall ) { return partitions.contains( filterCall.getArguments().getString( "number" ) ); } }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall ) { // coerce to string String value = functionCall.getArguments().getString( 0 ); // make safe if( value == null ) value = ""; TupleEntry output = functionCall.getContext().getRhs(); Matcher matcher = functionCall.getContext().getLhs().reset( value ); if( replaceAll ) output.setString( 0, matcher.replaceAll( replacement ) ); else output.setString( 0, matcher.replaceFirst( replacement ) ); functionCall.getOutputCollector().add( output ); }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall ) { String value = functionCall.getArguments().getString( 0 ); if( value == null ) value = ""; Matcher matcher = functionCall.getContext().getLhs().reset( value ); while( matcher.find() ) { functionCall.getContext().getRhs().setString( 0, matcher.group() ); functionCall.getOutputCollector().add( functionCall.getContext().getRhs() ); } } }
public void operate( FlowProcess flowProcess, FunctionCall functionCall ) { functionCall.getOutputCollector().add( new Tuple( new BytesWritable( functionCall.getArguments().getString( 0 ).getBytes() ) ) ); } }
public void operate( FlowProcess flowProcess, FunctionCall functionCall ) { functionCall.getOutputCollector().add( new Tuple( new BytesWritable( functionCall.getArguments().getString( 0 ).getBytes() ) ) ); } }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall ) { String value = functionCall.getArguments().getString( 0 ); if( value == null ) value = ""; Matcher matcher = functionCall.getContext().getLhs().reset( value ); if( !matcher.find() ) throw new OperationException( "could not match pattern: [" + getPatternString() + "] with value: [" + value + "]" ); TupleEntry output = functionCall.getContext().getRhs(); if( groups != null ) onGivenGroups( functionCall, matcher, output ); else onFoundGroups( functionCall, matcher, output ); }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Pair<Pattern, TupleEntry>> functionCall ) { String value = functionCall.getArguments().getString( 0 ); if( value == null ) value = ""; String[] split = functionCall.getContext().getLhs().split( value ); for( String string : split ) { TupleEntry tupleEntry = functionCall.getContext().getRhs(); tupleEntry.setString( 0, string ); functionCall.getOutputCollector().add( tupleEntry ); } } }
@Override public void operate(FlowProcess flowProcess, FunctionCall funCall) { TupleEntry args = funCall.getArguments(); String keyword = args.getString(0); List<String> urls = parseSearchResult(keyword); for (String url : urls) { Tuple t = new Tuple(); t.add(url); funCall.getOutputCollector().add(t); } }
@Override public void operate(@SuppressWarnings("rawtypes") FlowProcess flowProcess, FunctionCall<Context> functionCall) { String line = functionCall.getArguments().getString(0); String word = line; // or not ? functionCall.getOutputCollector().add(new Tuple(word)); functionCall.getOutputCollector().add(new Tuple(word)); }
@Override public void operate(FlowProcess flowProcess, FunctionCall funCall) { TupleEntry args = funCall.getArguments(); String url = args.getString(1); String rawText = download(url); String plainText = parse(rawText); List<String> keywords = extractKeywords(plainText); for (String keyword : keywords) { Tuple t = new Tuple(); t.add(keyword); funCall.getOutputCollector().add(t); } }
@Override public void operate( FlowProcess flowProcess, FunctionCall functionCall ) { TupleEntry argument = functionCall.getArguments(); int num = argument.getInteger( 0 ); String chr = argument.getString( 1 ); Tuple result; if( num == number ) result = new Tuple( null, chr ); else result = new Tuple( num, chr ); functionCall.getOutputCollector().add( result ); } }
@Override public void operate( FlowProcess flowProcess, FunctionCall functionCall ) { TupleEntry argument = functionCall.getArguments(); int num = argument.getInteger( 0 ); String chr = argument.getString( 1 ); Tuple result; if( num == number ) result = new Tuple( null, chr ); else result = new Tuple( num, chr ); functionCall.getOutputCollector().add( result ); } }
@Test public void testCoerceCanonicalUnknown() { Tuple tuple = new Tuple( 1 ); TupleEntry results = new TupleEntry( Fields.UNKNOWN, tuple ); assertEquals( 1, results.getObject( 0 ) ); assertEquals( 1, results.getInteger( 0 ) ); assertEquals( 1, results.getShort( 0 ) ); assertEquals( 1L, results.getLong( 0 ) ); assertEquals( 1.0F, results.getFloat( 0 ) ); assertEquals( 1.0D, results.getDouble( 0 ) ); assertEquals( "1", results.getString( 0 ) ); }
@Test public void testSinkDeclaredFields() throws IOException { getPlatform().copyFromLocal( inputFileCross ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileCross ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third" ), "\\s" ), Fields.ALL ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), new Fields( "second", "first", "third" ), getOutputPath( "declaredsinks" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 37, null ); TupleEntryIterator iterator = flow.openSink(); String line = iterator.next().getString( 0 ); assertTrue( "not equal: wrong values", line.matches( "[a-z]\t[0-9]\t[A-Z]" ) ); iterator.close(); }
@Test public void testSinkDeclaredFields() throws IOException { getPlatform().copyFromLocal( inputFileCross ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileCross ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third" ), "\\s" ), Fields.ALL ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), new Fields( "second", "first", "third" ), getOutputPath( "declaredsinks" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 37, null ); TupleEntryIterator iterator = flow.openSink(); String line = iterator.next().getString( 0 ); assertTrue( "not equal: wrong values", line.matches( "[a-z]\t[0-9]\t[A-Z]" ) ); iterator.close(); }