Tabnine Logo
Combine$CombineFn
Code IndexAdd Tabnine to your IDE (free)

How to use
Combine$CombineFn
in
org.apache.beam.sdk.transforms

Best Java code snippets using org.apache.beam.sdk.transforms.Combine$CombineFn (Showing top 20 results out of 315)

origin: org.apache.beam/beam-runners-direct-java

try {
 accumulatorCoder =
   combineFn.getAccumulatorCoder(input.getPipeline().getCoderRegistry(), inputValueCoder);
} catch (CannotProvideCoderException e) {
 throw new IllegalStateException(
origin: org.apache.beam/beam-runners-flink_2.10

@Override
public void add(InputT value) {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(),
      StringSerializer.INSTANCE,
      flinkStateDescriptor);
  AccumT current = state.value();
  if (current == null) {
   current = combineFn.createAccumulator();
  }
  current = combineFn.addInput(current, value);
  state.update(current);
 } catch (Exception e) {
  throw new RuntimeException("Error adding to state." , e);
 }
}
origin: org.apache.beam/beam-runners-direct-java

@Override
public void processElement(WindowedValue<KV<K, Iterable<AccumT>>> element) throws Exception {
 checkState(
   element.getWindows().size() == 1,
   "Expected inputs to %s to be in exactly one window. Got %s",
   MergeAccumulatorsAndExtractOutputEvaluator.class.getSimpleName(),
   element.getWindows().size());
 Iterable<AccumT> inputAccumulators = element.getValue().getValue();
 try {
  AccumT first = combineFn.createAccumulator();
  AccumT merged =
    combineFn.mergeAccumulators(
      Iterables.concat(
        Collections.singleton(first),
        inputAccumulators,
        Collections.singleton(combineFn.createAccumulator())));
  OutputT extracted = combineFn.extractOutput(merged);
  output.add(element.withValue(KV.of(element.getValue().getKey(), extracted)));
 } catch (Exception e) {
  throw UserCodeException.wrap(e);
 }
}
origin: org.apache.beam/beam-runners-flink_2.10

combineFn.getAccumulatorCoder(
  context.getInput(transform).getPipeline().getCoderRegistry(),
  inputCoder.getValueCoder());
origin: org.apache.beam/beam-runners-flink

combineFn.getAccumulatorCoder(
  context.getInput(transform).getPipeline().getCoderRegistry(),
  inputCoder.getValueCoder());
origin: org.apache.beam/beam-runners-flink_2.11

combineFn.getAccumulatorCoder(
  context.getInput(transform).getPipeline().getCoderRegistry(),
  inputCoder.getValueCoder());
origin: org.apache.beam/beam-runners-core-construction-java

  combineFn.getAccumulatorCoder(pipeline.getCoderRegistry(), input.getCoder()),
  getAccumulatorCoder(combineProto, RehydratedComponents.forComponents(componentsProto)));
assertEquals(
  combineFn,
origin: org.apache.beam/beam-sdks-java-core

/**
 * <b><i>For internal use only; no backwards-compatibility guarantees.</i></b>
 *
 * <p>Create a state spec for values that use a {@link CombineFn} to automatically merge multiple
 * {@code InputT}s into a single {@code OutputT}.
 *
 * <p>This determines the {@code Coder<AccumT>} from the given {@code Coder<InputT>}, and should
 * only be used to initialize static values.
 */
@Internal
public static <InputT, AccumT, OutputT>
  StateSpec<CombiningState<InputT, AccumT, OutputT>> combiningFromInputInternal(
    Coder<InputT> inputCoder, CombineFn<InputT, AccumT, OutputT> combineFn) {
 try {
  Coder<AccumT> accumCoder = combineFn.getAccumulatorCoder(STANDARD_REGISTRY, inputCoder);
  return combiningInternal(accumCoder, combineFn);
 } catch (CannotProvideCoderException e) {
  throw new IllegalArgumentException(
    "Unable to determine accumulator coder for "
      + combineFn.getClass().getSimpleName()
      + " from "
      + inputCoder,
    e);
 }
}
origin: org.apache.beam/beam-runners-direct-java

@Test
public void testAccumulatorCombiningStateWithUnderlying() throws CannotProvideCoderException {
 CopyOnAccessInMemoryStateInternals<String> underlying =
   CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
 CombineFn<Long, long[], Long> sumLongFn = Sum.ofLongs();
 StateNamespace namespace = new StateNamespaceForTest("foo");
 CoderRegistry reg = pipeline.getCoderRegistry();
 StateTag<CombiningState<Long, long[], Long>> stateTag =
   StateTags.combiningValue(
     "summer", sumLongFn.getAccumulatorCoder(reg, reg.getCoder(Long.class)), sumLongFn);
 GroupingState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
 assertThat(underlyingValue.read(), equalTo(0L));
 underlyingValue.add(1L);
 assertThat(underlyingValue.read(), equalTo(1L));
 CopyOnAccessInMemoryStateInternals<String> internals =
   CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
 GroupingState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
 assertThat(copyOnAccessState.read(), equalTo(1L));
 copyOnAccessState.add(4L);
 assertThat(copyOnAccessState.read(), equalTo(5L));
 assertThat(underlyingValue.read(), equalTo(1L));
 GroupingState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
 assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
}
origin: org.apache.beam/beam-runners-direct-java

@Test
public void testAccumulatorCombiningStateWithUnderlying() throws CannotProvideCoderException {
 CopyOnAccessInMemoryStateInternals<String> underlying =
   CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
 CombineFn<Long, long[], Long> sumLongFn = Sum.ofLongs();
 StateNamespace namespace = new StateNamespaceForTest("foo");
 CoderRegistry reg = pipeline.getCoderRegistry();
 StateTag<CombiningState<Long, long[], Long>> stateTag =
   StateTags.combiningValue(
     "summer", sumLongFn.getAccumulatorCoder(reg, reg.getCoder(Long.class)), sumLongFn);
 GroupingState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
 assertThat(underlyingValue.read(), equalTo(0L));
 underlyingValue.add(1L);
 assertThat(underlyingValue.read(), equalTo(1L));
 CopyOnAccessInMemoryStateInternals<String> internals =
   CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
 GroupingState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
 assertThat(copyOnAccessState.read(), equalTo(1L));
 copyOnAccessState.add(4L);
 assertThat(copyOnAccessState.read(), equalTo(5L));
 assertThat(underlyingValue.read(), equalTo(1L));
 GroupingState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
 assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
}
origin: org.apache.beam/beam-runners-flink_2.10

@Override
public void addAccum(AccumT accum) {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
     namespace.stringKey(),
     StringSerializer.INSTANCE,
     flinkStateDescriptor);
  AccumT current = state.value();
  if (current == null) {
   state.update(accum);
  } else {
   current = combineFn.mergeAccumulators(Lists.newArrayList(current, accum));
   state.update(current);
  }
 } catch (Exception e) {
  throw new RuntimeException("Error adding to state.", e);
 }
}
origin: org.apache.beam/beam-runners-flink_2.10

@Override
public OutputT read() {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(),
      StringSerializer.INSTANCE,
      flinkStateDescriptor);
  AccumT accum = state.value();
  if (accum != null) {
   return combineFn.extractOutput(accum);
  } else {
   return combineFn.extractOutput(combineFn.createAccumulator());
  }
 } catch (Exception e) {
  throw new RuntimeException("Error reading state.", e);
 }
}
origin: org.apache.beam/beam-runners-flink

@Override
public void addAccum(AccumT accum) {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(), StringSerializer.INSTANCE, flinkStateDescriptor);
  AccumT current = state.value();
  if (current == null) {
   state.update(accum);
  } else {
   current = combineFn.mergeAccumulators(Lists.newArrayList(current, accum));
   state.update(current);
  }
 } catch (Exception e) {
  throw new RuntimeException("Error adding to state.", e);
 }
}
origin: org.apache.beam/beam-runners-flink_2.11

@Override
public void addAccum(AccumT accum) {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(), StringSerializer.INSTANCE, flinkStateDescriptor);
  AccumT current = state.value();
  if (current == null) {
   state.update(accum);
  } else {
   current = combineFn.mergeAccumulators(Lists.newArrayList(current, accum));
   state.update(current);
  }
 } catch (Exception e) {
  throw new RuntimeException("Error adding to state.", e);
 }
}
origin: org.apache.beam/beam-runners-flink

@Override
public OutputT read() {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(), StringSerializer.INSTANCE, flinkStateDescriptor);
  AccumT accum = state.value();
  if (accum != null) {
   return combineFn.extractOutput(accum);
  } else {
   return combineFn.extractOutput(combineFn.createAccumulator());
  }
 } catch (Exception e) {
  throw new RuntimeException("Error reading state.", e);
 }
}
origin: org.apache.beam/beam-runners-flink_2.11

@Override
public void add(InputT value) {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(), StringSerializer.INSTANCE, flinkStateDescriptor);
  AccumT current = state.value();
  if (current == null) {
   current = combineFn.createAccumulator();
  }
  current = combineFn.addInput(current, value);
  state.update(current);
 } catch (Exception e) {
  throw new RuntimeException("Error adding to state.", e);
 }
}
origin: org.apache.beam/beam-sdks-java-core

@Override
public Object[] mergeAccumulators(Iterable<Object[]> accumulators) {
 Iterator<Object[]> iter = accumulators.iterator();
 if (!iter.hasNext()) {
  return createAccumulator();
 } else {
  // Reuses the first accumulator, and overwrites its values.
  // It is safe because {@code accum[i]} only depends on
  // the i-th component of each accumulator.
  Object[] accum = iter.next();
  for (int i = 0; i < combineFnCount; ++i) {
   accum[i] = combineFns.get(i).mergeAccumulators(new ProjectionIterable(accumulators, i));
  }
  return accum;
 }
}
origin: org.apache.beam/beam-runners-flink

@Override
public void add(InputT value) {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(), StringSerializer.INSTANCE, flinkStateDescriptor);
  AccumT current = state.value();
  if (current == null) {
   current = combineFn.createAccumulator();
  }
  current = combineFn.addInput(current, value);
  state.update(current);
 } catch (Exception e) {
  throw new RuntimeException("Error adding to state.", e);
 }
}
origin: org.apache.beam/beam-sdks-java-core

private static <InputT, AccumT, OutputT> List<AccumT> combineInputs(
  CombineFn<InputT, AccumT, OutputT> fn, Iterable<? extends Iterable<InputT>> shards) {
 List<AccumT> accumulators = new ArrayList<>();
 int maybeCompact = 0;
 for (Iterable<InputT> shard : shards) {
  AccumT accumulator = fn.createAccumulator();
  for (InputT elem : shard) {
   accumulator = fn.addInput(accumulator, elem);
  }
  if (maybeCompact++ % 2 == 0) {
   accumulator = fn.compact(accumulator);
  }
  accumulators.add(accumulator);
 }
 return accumulators;
}
origin: org.apache.beam/beam-runners-flink_2.11

@Override
public OutputT read() {
 try {
  org.apache.flink.api.common.state.ValueState<AccumT> state =
    flinkStateBackend.getPartitionedState(
      namespace.stringKey(), StringSerializer.INSTANCE, flinkStateDescriptor);
  AccumT accum = state.value();
  if (accum != null) {
   return combineFn.extractOutput(accum);
  } else {
   return combineFn.extractOutput(combineFn.createAccumulator());
  }
 } catch (Exception e) {
  throw new RuntimeException("Error reading state.", e);
 }
}
org.apache.beam.sdk.transformsCombine$CombineFn

Javadoc

A CombineFn specifies how to combine a collection of input values of type InputT into a single output value of type OutputT. It does this via one or more intermediate mutable accumulator values of type AccumT.

The overall process to combine a collection of input InputT values into a single output OutputT value is as follows:

  1. The input InputT values are partitioned into one or more batches.
  2. For each batch, the #createAccumulator operation is invoked to create a fresh mutable accumulator value of type AccumT, initialized to represent the combination of zero values.
  3. For each input InputT value in a batch, the #addInput operation is invoked to add the value to that batch's accumulator AccumT value. The accumulator may just record the new value (e.g., if AccumT == List, or may do work to represent the combination more compactly.
  4. The #mergeAccumulators operation is invoked to combine a collection of accumulator AccumT values into a single combined output accumulator AccumT value, once the merging accumulators have had all all the input values in their batches added to them. This operation is invoked repeatedly, until there is only one accumulator value left.
  5. The #extractOutput operation is invoked on the final accumulator AccumTvalue to get the output OutputT value.

For example:

 
public class AverageFn extends CombineFn {@literal@}Override 
public boolean equals(Object other)  
if (other == null) return false; 
if (other == this) return true; 
if (!(other instanceof Accum))return false; 
Accum o = (Accum)other; 
if (this.sum != o.sum || this.count != o.count)  
return false; 
} else  
return true; 
} 
} 
} 
public Accum createAccumulator()  
return new Accum(); 
} 
public Accum addInput(Accum accum, Integer input)  
accum.sum += input; 
accum.count++; 
return accum; 
} 
public Accum mergeAccumulators(Iterable accums)  
Accum merged = createAccumulator(); 
for (Accum accum : accums)  
merged.sum += accum.sum; 
merged.count += accum.count; 
} 
return merged; 
} 
public Double extractOutput(Accum accum)  
return ((double) accum.sum) / accum.count; 
} 
} 
PCollection pc = ...; 
PCollection average = pc.apply(Combine.globally(new AverageFn())); 
}

Combining functions used by Combine.Globally, Combine.PerKey, Combine.GroupedValues, and PTransforms derived from them should be associative and commutative. Associativity is required because input values are first broken up into subgroups before being combined, and their intermediate results further combined, in an arbitrary tree structure. Commutativity is required because any order of the input values is ignored when breaking up input values into groups.

Note on Data Encoding

Some form of data encoding is required when using custom types in a CombineFn which do not have well-known coders. The sample code above uses a custom Accumulator which gets coder by implementing java.io.Serializable. By doing this, we are relying on the generic org.apache.beam.sdk.coders.CoderProvider, which is able to provide a coder for any java.io.Serializable if applicable. In cases where java.io.Serializable is not efficient, or inapplicable, in general there are two alternatives for encoding:

  • Default org.apache.beam.sdk.coders.CoderRegistry. For example, implement a coder class explicitly and use the @DefaultCoder tag. See the org.apache.beam.sdk.coders.CoderRegistry for the numerous ways in which to bind a type to a coder.
  • CombineFn specific way. While extending CombineFn, overwrite both #getAccumulatorCoder and #getDefaultOutputCoder.

Most used methods

  • getAccumulatorCoder
  • addInput
    Adds the given input value to the given accumulator, returning the new accumulator value.For efficie
  • createAccumulator
    Returns a new, mutable accumulator value, representing the accumulation of zero input values.
  • extractOutput
    Returns the output value that is the result of combining all the input values represented by the giv
  • mergeAccumulators
    Returns an accumulator representing the accumulation of all the input values accumulated in the merg
  • compact
    Returns an accumulator that represents the same logical value as the input accumulator, but may have
  • getOutputType
    Returns a TypeDescriptor capturing what is known statically about the output type of this CombineFn
  • apply
    Applies this CombineFn to a collection of input values to produce a combined output value.Useful whe
  • defaultValue
    By default returns the extract output of an empty accumulator.
  • getDefaultOutputCoder
  • getIncompatibleGlobalWindowErrorMessage
  • populateDisplayData
  • getIncompatibleGlobalWindowErrorMessage,
  • populateDisplayData

Popular in Java

  • Making http requests using okhttp
  • addToBackStack (FragmentTransaction)
  • getSystemService (Context)
  • putExtra (Intent)
  • BufferedReader (java.io)
    Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
  • Path (java.nio.file)
  • BitSet (java.util)
    The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
  • HashMap (java.util)
    HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
  • Timer (java.util)
    Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
  • JarFile (java.util.jar)
    JarFile is used to read jar entries and their associated data from jar files.
  • Top 12 Jupyter Notebook extensions
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now