@Override public Partition[] getPartitions(S config) { int id = config.getRddId(); jobId = new JobID(jobTrackerId, id); Configuration conf = getHadoopConfig(config); JobContext jobContext = DeepSparkHadoopMapReduceUtil.newJobContext(conf, jobId); try { List<InputSplit> splits = inputFormat.getSplits(jobContext); Partition[] partitions = new Partition[(splits.size())]; for (int i = 0; i < splits.size(); i++) { partitions[i] = new NewHadoopPartition(id, i, splits.get(i)); } return partitions; } catch (IOException | InterruptedException | RuntimeException e) { LOG.error("Impossible to calculate partitions " + e.getMessage()); throw new DeepGenericException("Impossible to calculate partitions ", e); } }
@Override public Partition[] getPartitions(S config) { int id = config.getRddId(); jobId = new JobID(jobTrackerId, id); Configuration conf = getHadoopConfig(config); JobContext jobContext = DeepSparkHadoopMapReduceUtil.newJobContext(conf, jobId); try { List<InputSplit> splits = inputFormat.getSplits(jobContext); Partition[] partitions = new Partition[(splits.size())]; for (int i = 0; i < splits.size(); i++) { partitions[i] = new NewHadoopPartition(id, i, splits.get(i)); } return partitions; } catch (IOException | InterruptedException | RuntimeException e) { LOG.error("Impossible to calculate partitions " + e.getMessage()); throw new DeepGenericException("Impossible to calculate partitions ", e); } }
@Override public void initSave(S config, T first, UpdateQueryBuilder queryBuilder) { int id = config.getRddId(); int partitionIndex = config.getPartitionId(); TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil .newTaskAttemptID(jobTrackerId, id, true, partitionIndex, 0); Configuration configuration = getHadoopConfig(config); hadoopAttemptContext = DeepSparkHadoopMapReduceUtil .newTaskAttemptContext(configuration, attemptId); try { writer = outputFormat.getRecordWriter(hadoopAttemptContext); } catch (IOException | InterruptedException e) { throw new DeepGenericException(e); } }
@Override public void initSave(S config, T first, UpdateQueryBuilder queryBuilder) { int id = config.getRddId(); int partitionIndex = config.getPartitionId(); TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil .newTaskAttemptID(jobTrackerId, id, true, partitionIndex, 0); Configuration configuration = getHadoopConfig(config); hadoopAttemptContext = DeepSparkHadoopMapReduceUtil .newTaskAttemptContext(configuration, attemptId); try { writer = outputFormat.getRecordWriter(hadoopAttemptContext); } catch (IOException | InterruptedException e) { throw new DeepGenericException(e); } }
@Override public void initIterator(Partition dp, S config) { int id = config.getRddId(); NewHadoopPartition split = (NewHadoopPartition) dp; TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil .newTaskAttemptID(jobTrackerId, id, true, split.index(), 0); Configuration configuration = getHadoopConfig(config); TaskAttemptContext hadoopAttemptContext = DeepSparkHadoopMapReduceUtil .newTaskAttemptContext(configuration, attemptId); try { reader = inputFormat.createRecordReader(split.serializableHadoopSplit().value(), hadoopAttemptContext); reader.initialize(split.serializableHadoopSplit().value(), hadoopAttemptContext); } catch (IOException | InterruptedException e) { throw new DeepGenericException(e); } }
@Override public void initIterator(Partition dp, S config) { int id = config.getRddId(); NewHadoopPartition split = (NewHadoopPartition) dp; TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil .newTaskAttemptID(jobTrackerId, id, true, split.index(), 0); Configuration configuration = getHadoopConfig(config); TaskAttemptContext hadoopAttemptContext = DeepSparkHadoopMapReduceUtil .newTaskAttemptContext(configuration, attemptId); try { reader = inputFormat.createRecordReader(split.serializableHadoopSplit().value(), hadoopAttemptContext); reader.initialize(split.serializableHadoopSplit().value(), hadoopAttemptContext); } catch (IOException | InterruptedException e) { throw new DeepGenericException(e); } }