@Override public Comparator<byte[]> getComparator( Class<byte[]> type ) { return new BytesComparator(); } }
@Override public Comparator<byte[]> getComparator( Class<byte[]> type ) { return new BytesComparator(); } }
valueFields.setComparator( "value", new BytesComparator() );
valueFields.setComparator( "value", new BytesComparator() );
@Test public void testSimpleGroupOnBytes() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new Each( pipe, new InsertRawBytes( new Fields( "bytes" ), "inserted text as bytes", true, true ), Fields.ALL ); Fields bytes = new Fields( "bytes" ); bytes.setComparator( "bytes", new BytesComparator() ); pipe = new GroupBy( pipe, bytes ); pipe = new Every( pipe, new Count(), new Fields( "bytes", "count" ) ); Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "grouponbytes" ), SinkMode.REPLACE ); Map<Object, Object> properties = getProperties(); TupleSerializationProps.addSerialization( properties, BytesSerialization.class.getName() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); // 10 unique counts }
@Test public void testSimpleGroupOnBytes() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new Each( pipe, new InsertRawBytes( new Fields( "bytes" ), "inserted text as bytes", true, true ), Fields.ALL ); Fields bytes = new Fields( "bytes" ); bytes.setComparator( "bytes", new BytesComparator() ); pipe = new GroupBy( pipe, bytes ); pipe = new Every( pipe, new Count(), new Fields( "bytes", "count" ) ); Tap sink = new Hfs( new SequenceFile( Fields.ALL ), getOutputPath( "grouponbytes" ), SinkMode.REPLACE ); Map<Object, Object> properties = getProperties(); TupleSerializationProps.addSerialization( properties, BytesSerialization.class.getName() ); Flow flow = getPlatform().getFlowConnector( properties ).connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); // 10 unique counts }