@Override public void configureShuffle(Job job, GroupingOptions options) { AvroTableType<K, V> att = (AvroTableType<K, V>) tableType; String schemaJson = att.getSchema().toString(); Configuration conf = job.getConfiguration(); if (!att.isSpecific()) { conf.setBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, true); } conf.set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson); job.setSortComparatorClass(AvroKeyComparator.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(AvroValue.class); if (options != null) { options.configure(job); } Avros.configureReflectDataFactory(conf); Collection<String> serializations = job.getConfiguration().getStringCollection("io.serializations"); if (!serializations.contains(SafeAvroSerialization.class.getName())) { serializations.add(SafeAvroSerialization.class.getName()); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0])); } }
@Override public void configureShuffle(Job job, GroupingOptions options) { if (options != null) { options.configure(job); } WritableType keyType = (WritableType) tableType.getKeyType(); WritableType valueType = (WritableType) tableType.getValueType(); job.setMapOutputKeyClass(keyType.getSerializationClass()); job.setMapOutputValueClass(valueType.getSerializationClass()); } }
@Override public void configureShuffle(Job job, GroupingOptions options) { if (options != null) { options.configure(job); } WritableType keyType = (WritableType) tableType.getKeyType(); WritableType valueType = (WritableType) tableType.getValueType(); job.setMapOutputKeyClass(keyType.getSerializationClass()); job.setMapOutputValueClass(valueType.getSerializationClass()); } }
@Override public void configureShuffle(Job job, GroupingOptions options) { AvroTableType<K, V> att = (AvroTableType<K, V>) tableType; String schemaJson = att.getSchema().toString(); Configuration conf = job.getConfiguration(); if (att.hasReflect()) { if (att.hasSpecific()) { Avros.checkCombiningSpecificAndReflectionSchemas(); } conf.setBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, true); } conf.set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson); job.setSortComparatorClass(AvroKeyComparator.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(AvroValue.class); if (options != null) { options.configure(job); } Avros.configureReflectDataFactory(conf); Collection<String> serializations = job.getConfiguration().getStringCollection( "io.serializations"); if (!serializations.contains(SafeAvroSerialization.class.getName())) { serializations.add(SafeAvroSerialization.class.getName()); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0])); } }
@Override public void configureShuffle(Job job, GroupingOptions options) { AvroTableType<K, V> att = (AvroTableType<K, V>) tableType; String schemaJson = att.getSchema().toString(); Configuration conf = job.getConfiguration(); if (att.hasReflect()) { if (att.hasSpecific()) { Avros.checkCombiningSpecificAndReflectionSchemas(); } conf.setBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, true); } conf.set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson); job.setSortComparatorClass(AvroKeyComparator.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(AvroValue.class); if (options != null) { options.configure(job); } AvroMode.fromType(att).withFactoryFromConfiguration(conf).configureShuffle(conf); Collection<String> serializations = job.getConfiguration().getStringCollection( "io.serializations"); if (!serializations.contains(SafeAvroSerialization.class.getName())) { serializations.add(SafeAvroSerialization.class.getName()); job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0])); } }
private Partitioner getPartitioner() { if (partitioner == null) { try { ptype.initialize(runtimeContext.getConfiguration()); Job job = new Job(runtimeContext.getConfiguration()); options.configure(job); ptype.configureShuffle(job, options); partitioner = ReflectionUtils.newInstance(options.getPartitionerClass(), job.getConfiguration()); } catch (IOException e) { throw new CrunchRuntimeException("Error configuring partitioner", e); } } return partitioner; } }
private Partitioner getPartitioner() { if (partitioner == null) { try { ptype.initialize(runtimeContext.getConfiguration()); Job job = new Job(runtimeContext.getConfiguration()); options.configure(job); ptype.configureShuffle(job, options); partitioner = ReflectionUtils.newInstance(options.getPartitionerClass(), job.getConfiguration()); } catch (IOException e) { throw new CrunchRuntimeException("Error configuring partitioner", e); } } return partitioner; } }
@Override public void configureShuffle(Job job, GroupingOptions options) { if (options != null) { options.configure(job); } WritableType keyType = (WritableType) tableType.getKeyType(); WritableType valueType = (WritableType) tableType.getValueType(); job.setMapOutputKeyClass(keyType.getSerializationClass()); job.setMapOutputValueClass(valueType.getSerializationClass()); if ((options == null || options.getSortComparatorClass() == null) && TupleWritable.class.equals(keyType.getSerializationClass())) { job.setSortComparatorClass(TupleWritable.Comparator.class); } } }
public static <S, T> Shuffler<S, T> create(PType<S> keyType, GroupingOptions options, Pipeline pipeline) { Map<Object, Collection<T>> map = getMapForKeyType(keyType); if (options != null) { Job job; try { job = new Job(pipeline.getConfiguration()); } catch (IOException e) { throw new IllegalStateException("Could not create Job instance", e); } options.configure(job); if (Pair.class.equals(keyType.getTypeClass()) && options.getGroupingComparatorClass() != null) { PType<?> pairKey = keyType.getSubTypes().get(0); return new SecondarySortShuffler(getMapForKeyType(pairKey)); } else if (options.getSortComparatorClass() != null) { RawComparator rc = ReflectionUtils.newInstance( options.getSortComparatorClass(), job.getConfiguration()); map = new TreeMap<Object, Collection<T>>(rc); return new MapShuffler<S, T>(map, keyType); } } return new MapShuffler<S, T>(map); }