@Override public void importData(URI inputUri, VariantMetadata metadata, List<StudyConfiguration> studyConfigurations) throws StorageEngineException, IOException { Path input = Paths.get(inputUri.getPath()); Map<String, LinkedHashMap<String, Integer>> samplesPositions = new HashMap<>(); for (StudyConfiguration sc : studyConfigurations) { LinkedHashMap<String, Integer> map = StudyConfiguration.getSortedIndexedSamplesPosition(sc); samplesPositions.put(sc.getStudyName(), map); samplesPositions.put(String.valueOf(sc.getStudyId()), map); } VariantReader variantReader = new VariantAvroReader(input.toAbsolutePath().toFile(), samplesPositions); ProgressLogger progressLogger = new ProgressLogger("Loaded variants"); ParallelTaskRunner.Task<Variant, Document> converterTask = new VariantToDocumentConverter(studyConfigurations, metadata, progressLogger); DataWriter<Document> writer = new MongoDBVariantDocumentDBWriter(variantsCollection); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(5).setSorted(false).setBatchSize(200).build(); ParallelTaskRunner<Variant, Document> ptr = new ParallelTaskRunner<>(variantReader, converterTask, writer, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error importing variants", e); } }
null, ParallelTaskRunner.Config.builder().setAbortOnFail(true) .setBatchSize(options.getInt(Options.LOAD_BATCH_SIZE.key(), Options.LOAD_BATCH_SIZE.defaultValue())) .setNumTasks(options.getInt(Options.LOAD_THREADS.key(), Options.LOAD_THREADS.defaultValue())).build() dbWriter, ParallelTaskRunner.Config.builder().setAbortOnFail(true) .setBatchSize(options.getInt(Options.LOAD_BATCH_SIZE.key(), Options.LOAD_BATCH_SIZE.defaultValue())) .setNumTasks(options.getInt(Options.LOAD_THREADS.key(), Options.LOAD_THREADS.defaultValue())).build()
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(numTasks).setBatchSize(batchSize).build(); ParallelTaskRunner<Document, String> runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try {
.setBatchSize(batchSize) .setCapacity(capacity) .setSorted(true)
.setBatchSize(1) .setAbortOnFail(true) .setSorted(false)
.setBatchSize(batchSize) .setAbortOnFail(true) .setSorted(false)
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(numTasks).setBatchSize(batchSize).build(); ParallelTaskRunner runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try {
.setReadQueuePutTimeout(20 * 60) .setNumTasks(loadThreads) .setBatchSize(batchSize) .setAbortOnFail(true).build(); if (isStageParallelWrite(options)) {
.setReadQueuePutTimeout(20 * 60) .setNumTasks(loadThreads) .setBatchSize(batchSize) .setAbortOnFail(true).build(); if (isDirectLoadParallelWrite(options)) {
.setBatchSize(batchSize) .setAbortOnFail(true) .setSorted(false).build();
.setReadQueuePutTimeout(20 * 60) .setNumTasks(loadThreads) .setBatchSize(batchSize) .setAbortOnFail(true).build(); try {
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).setBatchSize(10).build();
.setBatchSize(1) .setReadQueuePutTimeout(1000).build();
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).setBatchSize(1).build(); ParallelTaskRunner<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice> ptr = new ParallelTaskRunner<>(reader, task, null, config);
/** * Loads variant annotations from an specified file into the selected Variant DataBase. * * @param uri URI of the annotation file * @param params Specific params. * @throws IOException IOException thrown * @throws StorageEngineException if there is a problem creating or running the {@link ParallelTaskRunner} */ public void loadVariantAnnotation(URI uri, ObjectMap params) throws IOException, StorageEngineException { final int batchSize = params.getInt(DefaultVariantAnnotationManager.BATCH_SIZE, 100); final int numConsumers = params.getInt(DefaultVariantAnnotationManager.NUM_WRITERS, 6); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numConsumers) .setBatchSize(batchSize) .setAbortOnFail(true) .setSorted(false).build(); DataReader<VariantAnnotation> reader; reader = newVariantAnnotationDataReader(uri); try { ProgressLogger progressLogger = new ProgressLogger("Loaded annotations: ", numAnnotationsToLoad.get()); ParallelTaskRunner<VariantAnnotation, ?> ptr = buildLoadAnnotationParallelTaskRunner(reader, config, progressLogger, params); ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading variant annotation", e); } }
public ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> createParallelRunner(int size, DataWriter<VcfSliceProtos.VcfSlice> collector) throws Exception { VcfVariantReader reader = VcfVariantReaderTest.createReader(size); Configuration conf = new Configuration(); ArchiveTableHelper helper = new ArchiveTableHelper(conf, 1, new VariantFileMetadata("1", "1")); ParallelTaskRunner.Task<Variant, VcfSliceProtos.VcfSlice> task = new VariantHbaseTransformTask(helper); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(1) .setBatchSize(10) .setAbortOnFail(true) .setSorted(false).build(); return new ParallelTaskRunner<>( reader, () -> task, collector, config ); }