eu.stratosphere.api.java.record.operators.FileDataSink.setInput java code examples

/**
 * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name,
 * writing to the file indicated by the given path. It uses the given contract as its input.
 *
 * @param f The {@link FileOutputFormat} implementation used to encode the data.
 * @param filePath The path to the file to write the contents to.
 * @param input The contract to use as the input.
 * @param name The given name for the sink, used in plans, logs and progress messages.
 */
public FileDataSink(Class<? extends FileOutputFormat<Record>> f, String filePath, Operator<Record> input, String name) {
  this(f, filePath, name);
  setInput(input);
}

/**
 * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation the default name,
 * writing to the file indicated by the given path. It uses the given contract as its input.
 * 
 * @param f The {@link FileOutputFormat} implementation used to encode the data.
 * @param filePath The path to the file to write the contents to.
 * @param input The contract to use as the input.
 */
public FileDataSink(FileOutputFormat<Record> f, String filePath, Operator<Record> input) {
  this(f, filePath);
  setInput(input);
}

/**
 * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name,
 * writing to the file indicated by the given path. It uses the given contract as its input.
 *
 * @param f The {@link FileOutputFormat} implementation used to encode the data.
 * @param filePath The path to the file to write the contents to.
 * @param input The contract to use as the input.
 * @param name The given name for the sink, used in plans, logs and progress messages.
 */
public FileDataSink(FileOutputFormat<Record> f, String filePath, Operator<Record> input, String name) {
  this(f, filePath, name);
  setInput(input);
}

/**
 * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name,
 * writing to the file indicated by the given path. It uses the given contracts as its input.
 *
 * @param f The {@link FileOutputFormat} implementation used to encode the data.
 * @param filePath The path to the file to write the contents to.
 * @param input The contracts to use as the input.
 * @param name The given name for the sink, used in plans, logs and progress messages.
 * @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead.
 */
@Deprecated
public FileDataSink(FileOutputFormat<Record> f, String filePath, List<Operator<Record>> input, String name) {
  this(f, filePath, name);
  Validate.notNull(input, "The input must not be null.");
  setInput(Operator.createUnionCascade(input));
}

/**
 * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name,
 * writing to the file indicated by the given path. It uses the given contracts as its input.
 *
 * @param f The {@link FileOutputFormat} implementation used to encode the data.
 * @param filePath The path to the file to write the contents to.
 * @param input The contracts to use as the input.
 * @param name The given name for the sink, used in plans, logs and progress messages.
 * @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead.
 */
@Deprecated
public FileDataSink(Class<? extends FileOutputFormat<Record>> f, String filePath, List<Operator<Record>> input, String name) {
  this(f, filePath, name);
  Validate.notNull(input, "The inputs must not be null.");
  setInput(Operator.createUnionCascade(input));
}

  @Override
  public Plan getPlan(String... args) throws IllegalArgumentException {
    // parse job parameters
    final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
    final String input = (args.length > 1 ? args[1] : "");
    final String output = (args.length > 2 ? args[2] : "");

    // This task will read the input data and generate the key/value pairs
    final FileDataSource source = 
        new FileDataSource(new TeraInputFormat(), input, "Data Source");
    source.setDegreeOfParallelism(numSubTasks);

    // This task writes the sorted data back to disk
    final FileDataSink sink = 
        new FileDataSink(new TeraOutputFormat(), output, "Data Sink");
    sink.setDegreeOfParallelism(numSubTasks);
    sink.setGlobalOrder(new Ordering(0, TeraKey.class, Order.ASCENDING), new TeraDistribution());

    sink.setInput(source);

    return new Plan(sink, "TeraSort");
  }
}

@Override
public Plan getPlan(String... args) throws IllegalArgumentException {
  // parse program parameters
  int numSubtasks       = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String recordsPath    = (args.length > 1 ? args[1] : "");
  String output        = (args.length > 2 ? args[2] : "");
  
  FileDataSource source = new FileDataSource(CsvInputFormat.class, recordsPath);
  source.setDegreeOfParallelism(numSubtasks);
  CsvInputFormat.configureRecordFormat(source)
    .recordDelimiter('\n')
    .fieldDelimiter('|')
    .field(IntValue.class, 0);
  
  FileDataSink sink =
    new FileDataSink(CsvOutputFormat.class, output);
  sink.setDegreeOfParallelism(numSubtasks);
  CsvOutputFormat.configureRecordFormat(sink)
    .recordDelimiter('\n')
    .fieldDelimiter('|')
    .lenient(true)
    .field(IntValue.class, 0);
  
  sink.setGlobalOrder(new Ordering(0, IntValue.class, Order.ASCENDING), new UniformIntegerDistribution(Integer.MIN_VALUE, Integer.MAX_VALUE));
  sink.setInput(source);
  
  return new Plan(sink);
}

    .appendOrdering(2, IntValue.class, Order.DESCENDING),
  new TripleIntDistribution(Order.DESCENDING, Order.ASCENDING, Order.DESCENDING));
sink.setInput(source);

@SuppressWarnings({ "deprecation", "unchecked" })
@Override
protected Plan getTestJob() {
  String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath;
  String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath;
  FileDataSource input1 = new FileDataSource(
    new ContractITCaseInputFormat(), input1Path);
  DelimitedInputFormat.configureDelimitedFormat(input1)
    .recordDelimiter('\n');
  input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));
  
  FileDataSource input2 = new FileDataSource(
      new ContractITCaseInputFormat(), input2Path);
  DelimitedInputFormat.configureDelimitedFormat(input2)
    .recordDelimiter('\n');
  input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));
  
  MapOperator testMapper = MapOperator.builder(new TestMapper()).build();
  testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1));
  FileDataSink output = new FileDataSink(
      new ContractITCaseOutputFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(testMapper);
  testMapper.addInput(input1);
  testMapper.addInput(input2);
  return new Plan(output);
}

@Override
protected Plan getTestJob() {
  FileDataSource input = new FileDataSource(
      new ContractITCaseInputFormat(), inPath);
  DelimitedInputFormat.configureDelimitedFormat(input)
    .recordDelimiter('\n');
  input.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1));
  MapOperator testMapper = MapOperator.builder(new TestMapper()).build();
  testMapper.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1));
  FileDataSink output = new FileDataSink(
      new ContractITCaseOutputFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(testMapper);
  testMapper.setInput(input);
  return new Plan(output);
}

result.setInput(groupByReturnFlag);

@Override
protected JobGraph getJobGraph() throws Exception {
  
  // init data source 
  FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);
  // init (working) map task
  MapOperator testMapper = MapOperator.builder(TestMapper.class).build();
  // init data sink
  FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath);
  // compose working program
  output.setInput(testMapper);
  testMapper.setInput(input);
  // generate plan
  Plan plan = new Plan(output);
  plan.setDefaultParallelism(4);
  // optimize and compile plan
  PactCompiler pc = new PactCompiler(new DataStatistics());
  OptimizedPlan op = pc.compile(plan);
  // return job graph of working job
  NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
  return jgg.compileJobGraph(op);
}

@Override
protected JobGraph getFailingJobGraph() throws Exception {
  
  // init data source 
  FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath);
  // init failing map task
  MapOperator testMapper = MapOperator.builder(FailingMapper.class).build();
  // init data sink
  FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath);
  // compose failing program
  output.setInput(testMapper);
  testMapper.setInput(input);
  // generate plan
  Plan plan = new Plan(output);
  plan.setDefaultParallelism(4);
  // optimize and compile plan 
  PactCompiler pc = new PactCompiler(new DataStatistics());
  OptimizedPlan op = pc.compile(plan);
  
  // return job graph of failing job
  NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
  return jgg.compileJobGraph(op);
}

result.setDegreeOfParallelism(numSubTasks);
result.setInput(findShortestPaths);
findShortestPaths.setFirstInput(pathsInput);
findShortestPaths.setSecondInput(concatPaths);

  .field(StringValue.class, 2);
triangles.setInput(closeTriads);
closeTriads.setSecondInput(edges);
closeTriads.setFirstInput(buildTriads);

output.setDegreeOfParallelism(1);
output.setInput(testCross);
testCross.setFirstInput(input_left);
testCross.setSecondInput(input_right);

output.setDegreeOfParallelism(1);
output.setInput(testMatcher);
testMatcher.setFirstInput(input_left);
testMatcher.setSecondInput(input_right);

@Override
protected JobGraph getJobGraph() throws Exception {
  FileDataSource input = new FileDataSource(
      new ContractITCaseInputFormat(), inPath);
  DelimitedInputFormat.configureDelimitedFormat(input)
    .recordDelimiter('\n');
  input.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));
  ReduceOperator testReducer = ReduceOperator.builder(new TestReducer(), StringValue.class, 0)
    .build();
  testReducer.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));
  testReducer.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
      config.getString("ReduceTest#LocalStrategy", ""));
  testReducer.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY,
      config.getString("ReduceTest#ShipStrategy", ""));
  FileDataSink output = new FileDataSink(
      new ContractITCaseOutputFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(testReducer);
  testReducer.setInput(input);
  Plan plan = new Plan(output);
  PactCompiler pc = new PactCompiler(new DataStatistics());
  OptimizedPlan op = pc.compile(plan);
  NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
  return jgg.compileJobGraph(op);
}

@Override
protected Plan getTestJob() {
  FileDataSource input_left =  new FileDataSource(new CoGroupTestInFormat(), leftInPath);
  DelimitedInputFormat.configureDelimitedFormat(input_left)
    .recordDelimiter('\n');
  input_left.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));
  FileDataSource input_right =  new FileDataSource(new CoGroupTestInFormat(), rightInPath);
  DelimitedInputFormat.configureDelimitedFormat(input_right)
    .recordDelimiter('\n');
  input_right.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));
  CoGroupOperator testCoGrouper = CoGroupOperator.builder(new TestCoGrouper(), StringValue.class, 0, 0)
    .build();
  testCoGrouper.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1));
  testCoGrouper.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
      config.getString("CoGroupTest#LocalStrategy", ""));
  testCoGrouper.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY,
      config.getString("CoGroupTest#ShipStrategy", ""));
  FileDataSink output = new FileDataSink(new CoGroupOutFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(testCoGrouper);
  testCoGrouper.setFirstInput(input_left);
  testCoGrouper.setSecondInput(input_right);
  return new Plan(output);
}

@Override
protected Plan getTestJob() {
  // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id
  FileDataSource sc1 = new FileDataSource(new CsvInputFormat(), sc1Path);
  CsvInputFormat.configureRecordFormat(sc1).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1)
      .field(IntValue.class, 2).field(IntValue.class, 3);
  // Sc2 generates N x values to be evaluated with the polynomial identified by id
  FileDataSource sc2 = new FileDataSource(new CsvInputFormat(), sc2Path);
  CsvInputFormat.configureRecordFormat(sc2).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);
  // Sc3 generates N y values to be evaluated with the polynomial identified by id
  FileDataSource sc3 = new FileDataSource(new CsvInputFormat(), sc3Path);
  CsvInputFormat.configureRecordFormat(sc3).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1);
  // Jn1 matches x and y values on id and emits (id, x, y) triples
  JoinOperator jn1 = JoinOperator.builder(Jn1.class, StringValue.class, 0, 0).input1(sc2).input2(sc3).build();
  // Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples
  JoinOperator jn2 = JoinOperator.builder(Jn2.class, StringValue.class, 0, 0).input1(jn1).input2(sc1).build();
  // Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2
  MapOperator mp1 = MapOperator.builder(Mp1.class).input(jn1).build();
  // Mp2 filters out all p values which can be divided by z
  MapOperator mp2 = MapOperator.builder(Mp2.class).setBroadcastVariable("z", mp1).input(jn2).build();
  FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(mp2);
  return new Plan(output);
}

Popular methods of FileDataSink

Popular in Java

Making http post requests using okhttp
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getExternalFilesDir (Context)
runOnUiThread (Activity)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Permission (java.security)
Legacy security code; do not use.
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Best plugins for Eclipse

How to use setInputmethodin eu.stratosphere.api.java.record.operators.FileDataSink

Best Java code snippets using eu.stratosphere.api.java.record.operators.FileDataSink.setInput (Showing top 20 results out of 315)

How to use
setInput
method
in
eu.stratosphere.api.java.record.operators.FileDataSink