@Override public void importData(URI inputUri, VariantMetadata metadata, List<StudyConfiguration> studyConfigurations) throws StorageEngineException, IOException { Path input = Paths.get(inputUri.getPath()); Map<String, LinkedHashMap<String, Integer>> samplesPositions = new HashMap<>(); for (StudyConfiguration sc : studyConfigurations) { LinkedHashMap<String, Integer> map = StudyConfiguration.getSortedIndexedSamplesPosition(sc); samplesPositions.put(sc.getStudyName(), map); samplesPositions.put(String.valueOf(sc.getStudyId()), map); } VariantReader variantReader = new VariantAvroReader(input.toAbsolutePath().toFile(), samplesPositions); ProgressLogger progressLogger = new ProgressLogger("Loaded variants"); ParallelTaskRunner.Task<Variant, Document> converterTask = new VariantToDocumentConverter(studyConfigurations, metadata, progressLogger); DataWriter<Document> writer = new MongoDBVariantDocumentDBWriter(variantsCollection); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(5).setSorted(false).setBatchSize(200).build(); ParallelTaskRunner<Variant, Document> ptr = new ParallelTaskRunner<>(variantReader, converterTask, writer, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error importing variants", e); } }
}, null, ParallelTaskRunner.Config.builder().setAbortOnFail(true) .setBatchSize(options.getInt(Options.LOAD_BATCH_SIZE.key(), Options.LOAD_BATCH_SIZE.defaultValue())) .setNumTasks(options.getInt(Options.LOAD_THREADS.key(), Options.LOAD_THREADS.defaultValue())).build() batch -> batch, dbWriter, ParallelTaskRunner.Config.builder().setAbortOnFail(true) .setBatchSize(options.getInt(Options.LOAD_BATCH_SIZE.key(), Options.LOAD_BATCH_SIZE.defaultValue())) .setNumTasks(options.getInt(Options.LOAD_THREADS.key(), Options.LOAD_THREADS.defaultValue())).build()
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(numTasks).setBatchSize(batchSize).build(); ParallelTaskRunner<Document, String> runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try {
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numTasks) .setBatchSize(batchSize)
})).then(new VariantToVcfSliceConverterTask(progressLogger)); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(options.getInt(Options.TRANSFORM_THREADS.key(), 1)) .setBatchSize(1)
long ts = System.currentTimeMillis(); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numConsumers) .setBatchSize(batchSize)
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(numTasks).setBatchSize(batchSize).build(); ParallelTaskRunner runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try {
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setReadQueuePutTimeout(20 * 60) .setNumTasks(loadThreads)
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setReadQueuePutTimeout(20 * 60) .setNumTasks(loadThreads)
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numThreads) .setBatchSize(batchSize)
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setReadQueuePutTimeout(20 * 60) .setNumTasks(loadThreads)
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).setBatchSize(10).build();
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(1) // Increasing the numTasks may produce wrong results writing the sampleIndex .setBatchSize(1)
ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).setBatchSize(1).build(); ParallelTaskRunner<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice> ptr = new ParallelTaskRunner<>(reader, task, null, config);
private void stageVariants(StudyConfiguration study, int fileId, List<Variant> variants) throws Exception { String archiveTableName = engine.getArchiveTableName(study.getStudyId()); ArchiveTableHelper.createArchiveTableIfNeeded(dbAdaptor.getGenomeHelper(), archiveTableName); // Create empty VariantFileMetadata VariantFileMetadata fileMetadata = new VariantFileMetadata(String.valueOf(fileId), String.valueOf(fileId)); fileMetadata.setSampleIds(variants.get(0).getStudies().get(0).getOrderedSamplesName()); dbAdaptor.getStudyConfigurationManager().updateVariantFileMetadata(String.valueOf(study.getStudyId()), fileMetadata); // Create dummy reader VariantSliceReader reader = getVariantSliceReader(variants, study.getStudyId(), fileId); // Task supplier Supplier<ParallelTaskRunner.Task<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice>> taskSupplier = () -> { VariantToVcfSliceConverter converter = new VariantToVcfSliceConverter(); return list -> { System.out.println("list.size() = " + list.size()); List<VcfSliceProtos.VcfSlice> vcfSlice = new ArrayList<>(list.size()); for (ImmutablePair<Long, List<Variant>> pair : list) { vcfSlice.add(converter.convert(pair.getRight(), pair.getLeft().intValue())); } return vcfSlice; }; }; // Writer VariantHBaseArchiveDataWriter writer = new VariantHBaseArchiveDataWriter(dbAdaptor.getArchiveHelper(study.getStudyId(), fileId), archiveTableName, dbAdaptor.getHBaseManager()); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).build(); ParallelTaskRunner<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice> ptr = new ParallelTaskRunner<>(reader, taskSupplier, writer, config); // Execute stage System.out.println("Stage start!"); ptr.run(); System.out.println("Stage finished!"); }
/** * Loads variant annotations from an specified file into the selected Variant DataBase. * * @param uri URI of the annotation file * @param params Specific params. * @throws IOException IOException thrown * @throws StorageEngineException if there is a problem creating or running the {@link ParallelTaskRunner} */ public void loadVariantAnnotation(URI uri, ObjectMap params) throws IOException, StorageEngineException { final int batchSize = params.getInt(DefaultVariantAnnotationManager.BATCH_SIZE, 100); final int numConsumers = params.getInt(DefaultVariantAnnotationManager.NUM_WRITERS, 6); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numConsumers) .setBatchSize(batchSize) .setAbortOnFail(true) .setSorted(false).build(); DataReader<VariantAnnotation> reader; reader = newVariantAnnotationDataReader(uri); try { ProgressLogger progressLogger = new ProgressLogger("Loaded annotations: ", numAnnotationsToLoad.get()); ParallelTaskRunner<VariantAnnotation, ?> ptr = buildLoadAnnotationParallelTaskRunner(reader, config, progressLogger, params); ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading variant annotation", e); } }
public ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> createParallelRunner(int size, DataWriter<VcfSliceProtos.VcfSlice> collector) throws Exception { VcfVariantReader reader = VcfVariantReaderTest.createReader(size); Configuration conf = new Configuration(); ArchiveTableHelper helper = new ArchiveTableHelper(conf, 1, new VariantFileMetadata("1", "1")); ParallelTaskRunner.Task<Variant, VcfSliceProtos.VcfSlice> task = new VariantHbaseTransformTask(helper); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(1) .setBatchSize(10) .setAbortOnFail(true) .setSorted(false).build(); return new ParallelTaskRunner<>( reader, () -> task, collector, config ); }