private void runBenchmark() { try { FastaIndexManager fastaIndexManager = getFastaIndexManger(); DirectoryStream<Path> stream = Files.newDirectoryStream(input, entry -> { return entry.getFileName().toString().endsWith(".vep"); }); DataWriter<Pair<VariantAnnotationDiff, VariantAnnotationDiff>> dataWriter = new BenchmarkDataWriter("VEP", "CellBase", output); ParallelTaskRunner.Config config = new ParallelTaskRunner.Config(numThreads, batchSize, QUEUE_CAPACITY, false); List<ParallelTaskRunner.TaskWithException<VariantAnnotation, Pair<VariantAnnotationDiff, VariantAnnotationDiff>, Exception>> variantAnnotatorTaskList = getBenchmarkTaskList(fastaIndexManager); for (Path entry : stream) { logger.info("Processing file '{}'", entry.toString()); DataReader dataReader = new VepFormatReader(input.resolve(entry.getFileName()).toString()); ParallelTaskRunner<VariantAnnotation, Pair<VariantAnnotationDiff, VariantAnnotationDiff>> runner = new ParallelTaskRunner<>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run(); } } catch (Exception e) { e.printStackTrace(); } }
protected ParallelTaskRunner<VariantAnnotation, ?> buildLoadAnnotationParallelTaskRunner( DataReader<VariantAnnotation> reader, ParallelTaskRunner.Config config, ProgressLogger progressLogger, ObjectMap params) { return new ParallelTaskRunner<>(reader, () -> newVariantAnnotationDBWriter(dbAdaptor, new QueryOptions(params)) .setProgressLogger(progressLogger), null, config); }
@Override public void importData(URI inputUri, VariantMetadata metadata, List<StudyConfiguration> studyConfigurations) throws StorageEngineException, IOException { Path input = Paths.get(inputUri.getPath()); Map<String, LinkedHashMap<String, Integer>> samplesPositions = new HashMap<>(); for (StudyConfiguration sc : studyConfigurations) { LinkedHashMap<String, Integer> map = StudyConfiguration.getSortedIndexedSamplesPosition(sc); samplesPositions.put(sc.getStudyName(), map); samplesPositions.put(String.valueOf(sc.getStudyId()), map); } VariantReader variantReader = new VariantAvroReader(input.toAbsolutePath().toFile(), samplesPositions); ProgressLogger progressLogger = new ProgressLogger("Loaded variants"); ParallelTaskRunner.Task<Variant, Document> converterTask = new VariantToDocumentConverter(studyConfigurations, metadata, progressLogger); DataWriter<Document> writer = new MongoDBVariantDocumentDBWriter(variantsCollection); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(5).setSorted(false).setBatchSize(200).build(); ParallelTaskRunner<Variant, Document> ptr = new ParallelTaskRunner<>(variantReader, converterTask, writer, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error importing variants", e); } }
new ParallelTaskRunner<>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run(); + VARIATION_ANNOTATION_FILE_PREFIX + chromosome + ".json.gz"); ParallelTaskRunner<Variant, Variant> runner = new ParallelTaskRunner<Variant, Variant>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run();
dbWriter.setProgressLogger(progressLogger); writers.add(dbWriter); ptr = new ParallelTaskRunner<>( dataReader, batch -> batch,
.setSorted(false).build(); ParallelTaskRunner<Variant, VariantAnnotation> parallelTaskRunner = new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config); parallelTaskRunner.run(); } catch (ExecutionException e) {
ParallelTaskRunner<Document, String> runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try { logger.info("Starting stats creation for cohorts {}", cohorts.keySet());
ParallelTaskRunner<Variant, Variant> ptr = new ParallelTaskRunner<>(variantDBReader, progressTask, variantDataWriter, config); try { ptr.run();
ptr = new ParallelTaskRunner<>(sliceReader, supplier, dataWriter, config);
ParallelTaskRunner runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try { logger.info("starting stats creation for cohorts {}", cohorts.keySet());
@Override protected ParallelTaskRunner<VariantAnnotation, ?> buildLoadAnnotationParallelTaskRunner( DataReader<VariantAnnotation> reader, ParallelTaskRunner.Config config, ProgressLogger progressLogger, ObjectMap params) { if (VariantPhoenixHelper.DEFAULT_TABLE_TYPE == PTableType.VIEW || params.getBoolean(HadoopVariantStorageEngine.VARIANT_TABLE_INDEXES_SKIP, false)) { int currentAnnotationId = dbAdaptor.getStudyConfigurationManager().getProjectMetadata().first() .getAnnotation().getCurrent().getId(); VariantAnnotationToHBaseConverter task = new VariantAnnotationToHBaseConverter(dbAdaptor.getGenomeHelper(), progressLogger, currentAnnotationId); VariantAnnotationHadoopDBWriter writer = new VariantAnnotationHadoopDBWriter( dbAdaptor.getHBaseManager(), dbAdaptor.getVariantTable(), dbAdaptor.getGenomeHelper().getColumnFamily()); return new ParallelTaskRunner<>(reader, task, writer, config); } else { return new ParallelTaskRunner<>(reader, () -> dbAdaptor.newAnnotationLoader(new QueryOptions(params)) .setProgressLogger(progressLogger), null, config); } }
try { GffReader gffReader = new GffReader(path); ParallelTaskRunner<Gff, Void> ptr = new ParallelTaskRunner<>( new FormatReaderWrapper<>(gffReader), gffList -> { try { BedReader bedReader = new BedReader(path); ParallelTaskRunner<Bed, Void> ptr = new ParallelTaskRunner<>( new FormatReaderWrapper<>(bedReader), bedList -> { ParallelTaskRunner<Variant, Void> ptr = new ParallelTaskRunner<>( VariantReaderUtils.getVariantVcfReader(Paths.get(fileName), metadata), variantList -> {
ptr = new ParallelTaskRunner<>( dataReader, taskSupplier, ptr = new ParallelTaskRunner<>( dataReader, taskSupplier,
if (isStageParallelWrite(options)) { logger.info("Multi thread stage load... [{} readerThreads, {} writerThreads]", numReaders, loadThreads); ptr = new ParallelTaskRunner<>(variantReader, remapIdsTask.then(converterTask).then(stageLoader), null, config); } else { logger.info("Multi thread stage load... [{} readerThreads, {} tasks, {} writerThreads]", numReaders, loadThreads, 1); ptr = new ParallelTaskRunner<>(variantReader, remapIdsTask.then(converterTask), stageLoader, config);
private void stageVariants(StudyConfiguration study, int fileId, List<Variant> variants) throws Exception { String archiveTableName = engine.getArchiveTableName(study.getStudyId()); ArchiveTableHelper.createArchiveTableIfNeeded(dbAdaptor.getGenomeHelper(), archiveTableName); // Create empty VariantFileMetadata VariantFileMetadata fileMetadata = new VariantFileMetadata(String.valueOf(fileId), String.valueOf(fileId)); fileMetadata.setSampleIds(variants.get(0).getStudies().get(0).getOrderedSamplesName()); dbAdaptor.getStudyConfigurationManager().updateVariantFileMetadata(String.valueOf(study.getStudyId()), fileMetadata); // Create dummy reader VariantSliceReader reader = getVariantSliceReader(variants, study.getStudyId(), fileId); // Task supplier Supplier<ParallelTaskRunner.Task<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice>> taskSupplier = () -> { VariantToVcfSliceConverter converter = new VariantToVcfSliceConverter(); return list -> { System.out.println("list.size() = " + list.size()); List<VcfSliceProtos.VcfSlice> vcfSlice = new ArrayList<>(list.size()); for (ImmutablePair<Long, List<Variant>> pair : list) { vcfSlice.add(converter.convert(pair.getRight(), pair.getLeft().intValue())); } return vcfSlice; }; }; // Writer VariantHBaseArchiveDataWriter writer = new VariantHBaseArchiveDataWriter(dbAdaptor.getArchiveHelper(study.getStudyId(), fileId), archiveTableName, dbAdaptor.getHBaseManager()); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).build(); ParallelTaskRunner<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice> ptr = new ParallelTaskRunner<>(reader, taskSupplier, writer, config); // Execute stage System.out.println("Stage start!"); ptr.run(); System.out.println("Stage finished!"); }
if (isDirectLoadParallelWrite(options)) { logger.info("Multi thread direct load... [{} readerThreads, {} writerThreads]", numReaders, loadThreads); ptr = new ParallelTaskRunner<>(stageReader, variantMerger.then(loader), null, config); } else { logger.info("Multi thread direct load... [{} readerThreads, {} tasks, {} writerThreads]", numReaders, loadThreads, 1); ptr = new ParallelTaskRunner<>(stageReader, variantMerger, loader, config);
public ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> createParallelRunner(int size, DataWriter<VcfSliceProtos.VcfSlice> collector) throws Exception { VcfVariantReader reader = VcfVariantReaderTest.createReader(size); Configuration conf = new Configuration(); ArchiveTableHelper helper = new ArchiveTableHelper(conf, 1, new VariantFileMetadata("1", "1")); ParallelTaskRunner.Task<Variant, VcfSliceProtos.VcfSlice> task = new VariantHbaseTransformTask(helper); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(1) .setBatchSize(10) .setAbortOnFail(true) .setSorted(false).build(); return new ParallelTaskRunner<>( reader, () -> task, collector, config ); }
try { if (isMergeParallelWrite(options)) { ptrMerge = new ParallelTaskRunner<>(reader, variantMerger.then(variantLoader), null, config); } else { ptrMerge = new ParallelTaskRunner<>(reader, variantMerger, variantLoader, config);
new ParallelTaskRunner<>(sliceReader, task, null, config); try { ptr.run();
new ParallelTaskRunner<>(reader, task, null, config); ptr.run();