cascading.scheme java code examples

@Override
public void sourceCleanup(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException {
  super.sourceCleanup(flowProcess, sourceCall);
  sourceCall.setContext(null);
}

public void sinkCleanup(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
  super.sinkCleanup(flowProcess, sinkCall);
  sinkCall.setContext(null);
}

  @SuppressWarnings("unchecked")
  @Override
  public void sink(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
    sinkCall.getOutput().collect(null, sinkCall);
  }
}

@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException {
  ScrollQuery query = sourceCall.getInput();
  boolean isJSON = (Boolean) sourceCall.getContext()[SRC_CTX_OUTPUT_JSON];
  TupleEntry entry = sourceCall.getIncomingEntry();
  FieldAlias alias = (FieldAlias) sourceCall.getContext()[SRC_CTX_ALIASES];

  static Tuple coerceToString(SinkCall<?, ?> sinkCall) {
    return sinkCall.getOutgoingEntry().getTuple();
  }
}

@Override
public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException {
  super.sourcePrepare(flowProcess, sourceCall);
  Object[] context = new Object[SRC_CTX_SIZE];
  context[SRC_CTX_KEY] = sourceCall.getInput().createKey();
  context[SRC_CTX_VALUE] = sourceCall.getInput().createValue();
  // as the tuple _might_ vary (some objects might be missing), we use a map rather then a collection
  Settings settings = loadSettings(flowProcess.getConfigCopy(), true);
  context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings);
  context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson();
  sourceCall.setContext(context);
}

@Override
public void sourceCleanup(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException {
  // in case of a source there's no local client so do all reporting here
  report(sourceCall.getInput().stats(), flowProcess);
  report(sourceCall.getInput().repository().stats(), flowProcess);
  sourceCall.getInput().close();
  sourceCall.setContext(null);
  // used for consistency
  cleanupClient(flowProcess);
}

@Override
public void sourcePrepare(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException {
  super.sourcePrepare(flowProcess, sourceCall);
  Object[] context = new Object[SRC_CTX_SIZE];
  Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
  context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings);
  context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson();
  sourceCall.setContext(context);
}

@Override
public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
  super.sinkPrepare(flowProcess, sinkCall);
  Object[] context = new Object[SINK_CTX_SIZE];
  // the tuple is fixed, so we can just use a collection/index
  Settings settings = loadSettings(flowProcess.getConfigCopy(), false);
  context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields());
  sinkCall.setContext(context);
}

@SuppressWarnings("unchecked")
@Override
public Result write(SinkCall<Object[], ?> sinkCall, Generator generator) {
  Tuple tuple = CascadingUtils.coerceToString(sinkCall);
  // consider names (in case of aliases these are already applied)
  List<String> names = (List<String>) sinkCall.getContext()[SINK_CTX_ALIASES];
  generator.writeBeginObject();
  for (int i = 0; i < tuple.size(); i++) {
    String name = (i < names.size() ? names.get(i) : "tuple" + i);
    // filter out fields
    if (shouldKeep(generator.getParentPath(), name)) {
      generator.writeFieldName(name);
      Object object = tuple.getObject(i);
      Result result = jdkWriter.write(object, generator);
      if (!result.isSuccesful()) {
        if (object instanceof Writable) {
          return writableWriter.write((Writable) object, generator);
        }
        return Result.FAILED(object);
      }
    }
  }
  generator.writeEndObject();
  return Result.SUCCESFUL();
}

@Override
public void sourceCleanup( FlowProcess<? extends Config> flowProcess, SourceCall<Void, Input> sourceCall ) throws IOException
 {
 scheme.sourceCleanup( flowProcess, sourceCall );
 }

/**
 * Constructor Scheme creates a new Scheme instance.
 *
 * @param sourceFields of type Fields
 * @param sinkFields   of type Fields
 */
protected Scheme( Fields sourceFields, Fields sinkFields )
 {
 setSourceFields( sourceFields );
 setSinkFields( sinkFields );
 }

@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException {
  Object[] context = sourceCall.getContext();
  if (!sourceCall.getInput().next(context[SRC_CTX_KEY], context[1])) {
    return false;
  TupleEntry entry = sourceCall.getIncomingEntry();

  @Override
  public String toString(Object field) {
    if (field instanceof SinkCall) {
      return ((SinkCall) field).getOutgoingEntry().toString();
    }
    return field.toString();
  }
}

@Override
public void sourceCleanup(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException {
  super.sourceCleanup(flowProcess, sourceCall);
  sourceCall.setContext(null);
}

public void sinkCleanup(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
  super.sinkCleanup(flowProcess, sinkCall);
  sinkCall.setContext(null);
}

@Override
public void sinkPrepare(FlowProcess<Properties> flowProcess, SinkCall<Object[], Object> sinkCall) throws IOException {
  super.sinkPrepare(flowProcess, sinkCall);
  Object[] context = new Object[SINK_CTX_SIZE];
  Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
  context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields());
  sinkCall.setContext(context);
}

@SuppressWarnings({ "rawtypes" })
@Override
protected Object extractField(Object target) {
  List<String> fieldNames = getFieldNames();
  for (int i = 0; i < fieldNames.size(); i++) {
    if (target instanceof SinkCall) {
      target = ((SinkCall) target).getOutgoingEntry().getObject(fieldNames.get(i));
      if (target == null) {
        return NOT_FOUND;
      }
    }
    else {
      return NOT_FOUND;
    }
  }
  return target;
}

  static Tuple coerceToString(SinkCall<?, ?> sinkCall) {
    TupleEntry entry = sinkCall.getOutgoingEntry();
    Fields fields = entry.getFields();
    Tuple tuple = entry.getTuple();
    if (fields.hasTypes()) {
      Type types[] = new Type[fields.size()];
      for (int index = 0; index < fields.size(); index++) {
        Type type = fields.getType(index);
        if (type instanceof CoercibleType<?>) {
          types[index] = String.class;
        }
        else {
          types[index] = type;
        }
      }
      tuple = entry.getCoercedTuple(types);
    }
    return tuple;
  }
}

  @Override
  public void convert(Object from, BytesArray to) {
    // expect a tuple holding one field - chararray or bytearray
    Assert.isTrue(from instanceof SinkCall,
        String.format("Unexpected object type, expecting [%s], given [%s]", SinkCall.class, from.getClass()));

    // handle common cases
    SinkCall sinkCall = (SinkCall) from;
    Tuple rawTuple = sinkCall.getOutgoingEntry().getTuple();

    if (rawTuple == null || rawTuple.isEmpty()) {
      to.bytes("{}");
      return;
    }
    Assert.isTrue(rawTuple.size() == 1, "When using JSON input, only one field is expected");

    // postpone the coercion
    Tuple tuple = CascadingUtils.coerceToString(sinkCall);
    super.convert(tuple.getObject(0), to);
  }
}

How to use cascading.scheme

Best Java code snippets using cascading.scheme (Showing top 20 results out of 315)