@Override public void emit(final OK key, final OV value) { this.reduceQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(final OK key, final OV value) { this.reduceQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(K key, V value) { if (this.doReduce) this.reduceMap.computeIfAbsent(key, k -> new ConcurrentLinkedQueue<>()).add(value); else this.mapQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(K key, V value) { if (this.doReduce) this.reduceMap.computeIfAbsent(key, k -> new ConcurrentLinkedQueue<>()).add(value); else this.mapQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(final OK key, final OV value) { this.reduceQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(K key, V value) { if (this.doReduce) this.reduceMap.computeIfAbsent(key, k -> new ConcurrentLinkedQueue<>()).add(value); else this.mapQueue.add(new KeyValue<>(key, value)); }
@Override public KeyValue next() { try { if (this.available) { this.available = false; return new KeyValue<>(this.key.get(), this.value.get()); } else { while (true) { if (this.readers.isEmpty()) throw new NoSuchElementException(); if (this.readers.peek().next(this.key, this.value)) { return new KeyValue<>(this.key.get(), this.value.get()); } else this.readers.remove().close(); } } } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } } }
@Override public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) { final Configuration configuration = new BaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName()); try { if (InputRDD.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } else if (InputFormat.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } } catch (final Exception e) { throw new IllegalArgumentException(e.getMessage(), e); } throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName()); }
@Override public <K, V> Iterator<KeyValue<K, V>> writeMemoryRDD(final Configuration configuration, final String memoryKey, final JavaPairRDD<K, V> memoryRDD) { if (!configuration.getBoolean(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false)) LOGGER.warn("The SparkContext should be persisted in order for the RDD to persist across jobs. To do so, set " + Constants.GREMLIN_SPARK_PERSIST_CONTEXT + " to true"); if (!configuration.containsKey(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_OUTPUT_LOCATION + " to write the persisted RDD to"); final String memoryRDDName = Constants.getMemoryLocation(configuration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryKey); Spark.removeRDD(memoryRDDName); memoryRDD.setName(memoryRDDName).persist(StorageLevel.fromString(configuration.getString(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, "MEMORY_ONLY"))) // call action to eager store rdd .count(); Spark.refresh(); // necessary to do really fast so the Spark GC doesn't clear out the RDD return IteratorUtils.map(memoryRDD.collect().iterator(), tuple -> new KeyValue<>(tuple._1(), tuple._2())); }
@Override public void emit(final OK key, final OV value) { this.reduceQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(final OK key, final OV value) { this.reduceQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(OK key, OV value) { reduceQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(final OK key, final OV value) { this.reduceQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(K key, V value) { if (this.doReduce) this.reduceMap.computeIfAbsent(key, k -> new ConcurrentLinkedQueue<>()).add(value); else this.mapQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(K key, V value) { if (this.doReduce) this.reduceMap.computeIfAbsent(key, k -> new ConcurrentLinkedQueue<>()).add(value); else this.mapQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(K key, V value) { if (this.doReduce) this.reduceMap.computeIfAbsent(key, k -> new ConcurrentLinkedQueue<>()).add(value); else this.mapQueue.add(new KeyValue<>(key, value)); }
@Override public void emit(K key, V value) { if (doReduce) reduceMap.computeIfAbsent(key, k -> new ConcurrentLinkedQueue<>()).add(value); else mapQueue.add(new KeyValue<>(key, value)); }
@Override public KeyValue next() { try { if (this.available) { this.available = false; return new KeyValue<>(this.key.get(), this.value.get()); } else { while (true) { if (this.readers.isEmpty()) throw new NoSuchElementException(); if (this.readers.peek().next(this.key, this.value)) { return new KeyValue<>(this.key.get(), this.value.get()); } else this.readers.remove().close(); } } } catch (final IOException e) { throw new IllegalStateException(e.getMessage(), e); } } }
@Override public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) { final Configuration configuration = new BaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName()); try { if (InputRDD.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } else if (InputFormat.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } } catch (final Exception e) { throw new IllegalArgumentException(e.getMessage(), e); } throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName()); }
@Override public <K, V> Iterator<KeyValue<K, V>> writeMemoryRDD(final Configuration configuration, final String memoryKey, final JavaPairRDD<K, V> memoryRDD) { if (!configuration.getBoolean(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false)) LOGGER.warn("The SparkContext should be persisted in order for the RDD to persist across jobs. To do so, set " + Constants.GREMLIN_SPARK_PERSIST_CONTEXT + " to true"); if (!configuration.containsKey(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)) throw new IllegalArgumentException("There is no provided " + Constants.GREMLIN_HADOOP_OUTPUT_LOCATION + " to write the persisted RDD to"); final String memoryRDDName = Constants.getMemoryLocation(configuration.getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION), memoryKey); Spark.removeRDD(memoryRDDName); memoryRDD.setName(memoryRDDName).persist(StorageLevel.fromString(configuration.getString(Constants.GREMLIN_SPARK_PERSIST_STORAGE_LEVEL, "MEMORY_ONLY"))) // call action to eager store rdd .count(); Spark.refresh(); // necessary to do really fast so the Spark GC doesn't clear out the RDD return IteratorUtils.map(memoryRDD.collect().iterator(), tuple -> new KeyValue<>(tuple._1(), tuple._2())); }