cascading.tap.hadoop.Hfs.openForRead java code examples

TupleEntryIterator in = approxCountsTap.openForRead(CascadingUtil.get().getFlowProcess());
List<HyperLogLog> countParts = new LinkedList<HyperLogLog>();

TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess());
System.out.println("Output tuples from flow:");
while (output.hasNext()) {

 public static void main(String[] args) throws IOException {
  if (args.length != 1) {
   System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.BloomJoinExample <output dir>");
   return;
  }

  Map<String, Tap> sources = new HashMap<String, Tap>();
  sources.put("source1", ExampleFixtures.SOURCE_TAP_1);
  sources.put("source2", ExampleFixtures.SOURCE_TAP_2);

  String outputDir = args[0];
  Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);

  Pipe source1 = new Pipe("source1");
  Pipe source2 = new Pipe("source2");

  Pipe joined = new BloomJoin(source1, new Fields("field1"), source2, new Fields("field3"));

  CascadingUtil.get().getFlowConnector().connect("Example flow", sources, sink, joined).complete();

  //  Take a look at the output tuples
  TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess());
  System.out.println("Output tuples from flow:");
  while (output.hasNext()) {
   System.out.println(output.next().getTuple());
  }
 }
}

 public static void main(String[] args) throws IOException {
  if (args.length != 1) {
   System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.SimpleFlowExample <output dir>");
   return;
  }

  String outputDir = args[0];
  Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);

  Pipe source1 = new Pipe("source1");

  Pipe source2 = new Pipe("source2");

  Pipe joined = new CoGroup(source1, new Fields("field1"),
    source2, new Fields("field3"));

  Map<String, Tap> sources = new HashMap<String, Tap>();
  sources.put("source1", ExampleFixtures.SOURCE_TAP_1);
  sources.put("source2", ExampleFixtures.SOURCE_TAP_2);

  CascadingUtil.get().getFlowConnector().connect("Example flow", sources, sink, joined).complete();

  //  Take a look at the output tuples
  TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess());
  System.out.println("Output tuples from flow:");
  while (output.hasNext()) {
   System.out.println(output.next().getTuple());
  }
 }
}

reader = temp.openForRead( new HadoopFlowProcess( jobConf ) );

public static void main(String[] args) throws IOException {
 if (args.length != 1) {
  System.out.println("Usage: hadoop jar cascading_ext.job.jar com.liveramp.cascading_ext.example.BloomJoinExampleWithoutCascadingUtil <output dir>");
  return;
 }
 String outputDir = args[0];
 Hfs sink = new Hfs(new SequenceFile(new Fields("field1", "field2", "field3", "field4")), outputDir);
 Pipe source1 = new Pipe("source1");
 Pipe source2 = new Pipe("source2");
 Pipe joined = new BloomJoin(source1, new Fields("field1"),
   source2, new Fields("field3"));
 Map<String, Tap> sources = new HashMap<String, Tap>();
 sources.put("source1", ExampleFixtures.SOURCE_TAP_1);
 sources.put("source2", ExampleFixtures.SOURCE_TAP_2);
 //  set some default properties and set the flow step strategy
 Flow f = new HadoopFlowConnector(BloomProps.getDefaultProperties()).connect("Example BloomJoin", sources, sink, joined);
 f.setFlowStepStrategy(new BloomAssemblyStrategy());
 f.complete();
 //  Take a look at the output tuples
 TupleEntryIterator output = sink.openForRead(CascadingUtil.get().getFlowProcess());
 System.out.println("Output tuples from flow:");
 while (output.hasNext()) {
  System.out.println(output.next().getTuple());
 }
}

reader = temp.openForRead( new HadoopFlowProcess( jobConf ) );

private static BloomFilter mergeBloomParts(String tapPath,
                      long numBloomBits,
                      long splitSize,
                      int numBloomHashes,
                      long numElems,
                      HashFunctionFactory hashFactory) throws IOException {
 FixedSizeBitSet bitSet = new FixedSizeBitSet(numBloomBits);
 if (FileSystemHelper.getFS().exists(new Path(tapPath))) {
  Hfs tap = new Hfs(new SequenceFile(new Fields("split", "filter")), tapPath);
  TupleEntryIterator itr = tap.openForRead(CascadingUtil.get().getFlowProcess());
  while (itr.hasNext()) {
   TupleEntry cur = itr.next();
   long split = cur.getLong(0);
   FixedSizeBitSet curSet = new FixedSizeBitSet(splitSize, ((BytesWritable)cur.getObject(1)).getBytes());
   for (long i = 0; i < curSet.numBits(); i++) {
    if (curSet.get(i)) {
     bitSet.set(split * splitSize + i);
    }
   }
  }
  itr.close();
 }
 return new BloomFilter(numBloomBits, numBloomHashes, bitSet, numElems, hashFactory);
}

@Test
public void testFlow() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 String outputPath1 = getOutputPath( "flowTest1" );
 String outputPath2 = getOutputPath( "flowTest2" );
 String outputPath3 = getOutputPath( "flowTest3" );
 remove( outputPath1, true );
 remove( outputPath2, true );
 remove( outputPath3, true );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf1 = createJob( defaultConf, "mr1", InputData.inputFileApache, outputPath1 );
 JobConf conf2 = createJob( defaultConf, "mr2", outputPath1, outputPath2 );
 JobConf conf3 = createJob( defaultConf, "mr3", outputPath2, outputPath3 );
 MultiMapReduceFlow flow = new MultiMapReduceFlow( "mrflow", conf1, conf2, conf3 );
 validateLength( new Hfs( new TextLine(), InputData.inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 flow.complete();
 validateLength( new Hfs( new TextLine(), outputPath1 ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 Collection<Tap> sinks = flow.getSinks().values();
 assertEquals( 1, sinks.size() );
 String identifier = sinks.iterator().next().getIdentifier();
 assertEquals( "flowTest3", identifier.substring( identifier.lastIndexOf( '/' ) + 1 ) );
 }

@Test
public void testFlow() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 String outputPath1 = getOutputPath( "flowTest1" );
 String outputPath2 = getOutputPath( "flowTest2" );
 String outputPath3 = getOutputPath( "flowTest3" );
 remove( outputPath1, true );
 remove( outputPath2, true );
 remove( outputPath3, true );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf1 = createJob( defaultConf, "mr1", InputData.inputFileApache, outputPath1 );
 JobConf conf2 = createJob( defaultConf, "mr2", outputPath1, outputPath2 );
 JobConf conf3 = createJob( defaultConf, "mr3", outputPath2, outputPath3 );
 MultiMapReduceFlow flow = new MultiMapReduceFlow( "mrflow", conf1, conf2, conf3 );
 validateLength( new Hfs( new TextLine(), InputData.inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 flow.complete();
 validateLength( new Hfs( new TextLine(), outputPath1 ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 Collection<Tap> sinks = flow.getSinks().values();
 assertEquals( 1, sinks.size() );
 String identifier = sinks.iterator().next().getIdentifier();
 assertEquals( "flowTest3", identifier.substring( identifier.lastIndexOf( '/' ) + 1 ) );
 }

@Test
public void testFlow() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf = new JobConf( defaultConf );
 conf.setJobName( "mrflow" );
 conf.setOutputKeyClass( LongWritable.class );
 conf.setOutputValueClass( Text.class );
 conf.setMapperClass( IdentityMapper.class );
 conf.setReducerClass( IdentityReducer.class );
 conf.setInputFormat( TextInputFormat.class );
 conf.setOutputFormat( TextOutputFormat.class );
 FileInputFormat.setInputPaths( conf, new Path( inputFileApache ) );
 String outputPath = getOutputPath( "flowTest" );
 FileOutputFormat.setOutputPath( conf, new Path( outputPath ) );
 Flow flow = new MapReduceFlow( "mrflow", conf, true );
 validateLength( new Hfs( new TextLine(), inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 flow.complete();
 validateLength( new Hfs( new TextLine(), outputPath ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 }

@Test
public void testFlow() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf = new JobConf( defaultConf );
 conf.setJobName( "mrflow" );
 conf.setOutputKeyClass( LongWritable.class );
 conf.setOutputValueClass( Text.class );
 conf.setMapperClass( IdentityMapper.class );
 conf.setReducerClass( IdentityReducer.class );
 conf.setInputFormat( TextInputFormat.class );
 conf.setOutputFormat( TextOutputFormat.class );
 FileInputFormat.setInputPaths( conf, new Path( inputFileApache ) );
 String outputPath = getOutputPath( "flowTest" );
 FileOutputFormat.setOutputPath( conf, new Path( outputPath ) );
 Flow flow = new MapReduceFlow( "mrflow", conf, true );
 validateLength( new Hfs( new TextLine(), inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 flow.complete();
 validateLength( new Hfs( new TextLine(), outputPath ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 }

@Test
public void testFlowLazy() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 String outputPath1 = getOutputPath( "flowTest1" );
 String outputPath2 = getOutputPath( "flowTest2" );
 String outputPath3 = getOutputPath( "flowTest3" );
 remove( outputPath1, true );
 remove( outputPath2, true );
 remove( outputPath3, true );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf1 = createJob( defaultConf, "mr1", InputData.inputFileApache, outputPath1 );
 JobConf conf2 = createJob( defaultConf, "mr2", outputPath1, outputPath2 );
 JobConf conf3 = createJob( defaultConf, "mr3", outputPath2, outputPath3 );
 validateLength( new Hfs( new TextLine(), InputData.inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 MultiMapReduceFlow flow = new MultiMapReduceFlow( "mrflow", conf1 );
 flow.start();
 Util.safeSleep( 3000 );
 flow.attachFlowStep( conf2 );
 Util.safeSleep( 3000 );
 flow.attachFlowStep( conf3 );
 flow.complete();
 validateLength( new Hfs( new TextLine(), outputPath1 ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 Collection<Tap> sinks = flow.getSinks().values();
 assertEquals( 1, sinks.size() );
 String identifier = sinks.iterator().next().getIdentifier();
 assertEquals( "flowTest3", identifier.substring( identifier.lastIndexOf( '/' ) + 1 ) );
 }

@Test
public void testFlowLazy() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 String outputPath1 = getOutputPath( "flowTest1" );
 String outputPath2 = getOutputPath( "flowTest2" );
 String outputPath3 = getOutputPath( "flowTest3" );
 remove( outputPath1, true );
 remove( outputPath2, true );
 remove( outputPath3, true );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf1 = createJob( defaultConf, "mr1", InputData.inputFileApache, outputPath1 );
 JobConf conf2 = createJob( defaultConf, "mr2", outputPath1, outputPath2 );
 JobConf conf3 = createJob( defaultConf, "mr3", outputPath2, outputPath3 );
 validateLength( new Hfs( new TextLine(), InputData.inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 MultiMapReduceFlow flow = new MultiMapReduceFlow( "mrflow", conf1 );
 flow.start();
 Util.safeSleep( 3000 );
 flow.attachFlowStep( conf2 );
 Util.safeSleep( 3000 );
 flow.attachFlowStep( conf3 );
 flow.complete();
 validateLength( new Hfs( new TextLine(), outputPath1 ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 Collection<Tap> sinks = flow.getSinks().values();
 assertEquals( 1, sinks.size() );
 String identifier = sinks.iterator().next().getIdentifier();
 assertEquals( "flowTest3", identifier.substring( identifier.lastIndexOf( '/' ) + 1 ) );
 }

@Test
public void testHfsAsterisk() throws Exception
 {
 getPlatform().copyFromLocal( inputFileLower );
 getPlatform().copyFromLocal( inputFileUpper );
 Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "*" );
 assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) );
 TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() );
 assertTrue( iterator.hasNext() );
 iterator.close();
 try
  {
  Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" );
  iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() );
  fail();
  }
 catch( IOException exception )
  {
  // do nothing
  }
 }

@Test
public void testHfsAsterisk() throws Exception
 {
 getPlatform().copyFromLocal( inputFileLower );
 getPlatform().copyFromLocal( inputFileUpper );
 Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "*" );
 assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) );
 TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() );
 assertTrue( iterator.hasNext() );
 iterator.close();
 try
  {
  Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" );
  iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() );
  fail();
  }
 catch( IOException exception )
  {
  // do nothing
  }
 }

@Test
public void testHfsBracketAsterisk() throws Exception
 {
 getPlatform().copyFromLocal( inputFileLower );
 getPlatform().copyFromLocal( inputFileUpper );
 Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "{*}" );
 assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) );
 TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() );
 assertTrue( iterator.hasNext() );
 iterator.close();
 try
  {
  Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" );
  iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() );
  fail();
  }
 catch( IOException exception )
  {
  // do nothing
  }
 }

@Test
public void testHfsBracketAsterisk() throws Exception
 {
 getPlatform().copyFromLocal( inputFileLower );
 getPlatform().copyFromLocal( inputFileUpper );
 Hfs sourceExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "{*}" );
 assertTrue( sourceExists.resourceExists( getPlatform().getFlowProcess() ) );
 TupleEntryIterator iterator = sourceExists.openForRead( getPlatform().getFlowProcess() );
 assertTrue( iterator.hasNext() );
 iterator.close();
 try
  {
  Hfs sourceNotExists = new Hfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "/blah/" );
  iterator = sourceNotExists.openForRead( getPlatform().getFlowProcess() );
  fail();
  }
 catch( IOException exception )
  {
  // do nothing
  }
 }

@Test(expected = IllegalStateException.class)
public void testFlowLazyFail() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 String outputPath1 = getOutputPath( "flowTest1" );
 String outputPath2 = getOutputPath( "flowTest2" );
 remove( outputPath1, true );
 remove( outputPath2, true );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf1 = createJob( defaultConf, "mr1", InputData.inputFileApache, outputPath1 );
 JobConf conf2 = createJob( defaultConf, "mr2", outputPath1, outputPath2 );
 validateLength( new Hfs( new TextLine(), InputData.inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 MultiMapReduceFlow flow = new MultiMapReduceFlow( "mrflow", conf1 );
 flow.complete();
 flow.attachFlowStep( conf2 );
 }

@Test(expected = IllegalStateException.class)
public void testFlowLazyFail() throws IOException
 {
 getPlatform().copyFromLocal( inputFileApache );
 String outputPath1 = getOutputPath( "flowTest1" );
 String outputPath2 = getOutputPath( "flowTest2" );
 remove( outputPath1, true );
 remove( outputPath2, true );
 JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration();
 JobConf conf1 = createJob( defaultConf, "mr1", InputData.inputFileApache, outputPath1 );
 JobConf conf2 = createJob( defaultConf, "mr2", outputPath1, outputPath2 );
 validateLength( new Hfs( new TextLine(), InputData.inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 );
 MultiMapReduceFlow flow = new MultiMapReduceFlow( "mrflow", conf1 );
 flow.complete();
 flow.attachFlowStep( conf2 );
 }

Popular methods of Hfs

<init>
Constructor Hfs creates a new Hfs instance.
getPath
getDefaultFileSystemURIScheme
Method getDefaultFileSystemURIScheme returns the URI scheme for the default Hadoop FileSystem.
getURIScheme
resourceExists
getTempPath
openForWrite
deleteResource
getChildIdentifiers
getIdentifier
getScheme
isDirectory

Popular in Java

Finding current android device location
getSharedPreferences (Context)
findViewById (Activity)
startActivity (Activity)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
String (java.lang)
KeyStore (java.security)
KeyStore is responsible for maintaining cryptographic keys and their owners. The type of the syste
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
Best plugins for Eclipse

How to use openForReadmethodin cascading.tap.hadoop.Hfs

Best Java code snippets using cascading.tap.hadoop.Hfs.openForRead (Showing top 20 results out of 315)

How to use
openForRead
method
in
cascading.tap.hadoop.Hfs