public void run() throws ExecutionException { try { run(Long.MAX_VALUE, TimeUnit.DAYS); } catch (InterruptedException e) { throw new ExecutionException("Error while running ParallelTaskRunner. Found " + interruptions.size() + " interruptions.", interruptions.get(0)); } }
private void runBenchmark() { try { FastaIndexManager fastaIndexManager = getFastaIndexManger(); DirectoryStream<Path> stream = Files.newDirectoryStream(input, entry -> { return entry.getFileName().toString().endsWith(".vep"); }); DataWriter<Pair<VariantAnnotationDiff, VariantAnnotationDiff>> dataWriter = new BenchmarkDataWriter("VEP", "CellBase", output); ParallelTaskRunner.Config config = new ParallelTaskRunner.Config(numThreads, batchSize, QUEUE_CAPACITY, false); List<ParallelTaskRunner.TaskWithException<VariantAnnotation, Pair<VariantAnnotationDiff, VariantAnnotationDiff>, Exception>> variantAnnotatorTaskList = getBenchmarkTaskList(fastaIndexManager); for (Path entry : stream) { logger.info("Processing file '{}'", entry.toString()); DataReader dataReader = new VepFormatReader(input.resolve(entry.getFileName()).toString()); ParallelTaskRunner<VariantAnnotation, Pair<VariantAnnotationDiff, VariantAnnotationDiff>> runner = new ParallelTaskRunner<>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run(); } } catch (Exception e) { e.printStackTrace(); } }
@Override public void importData(URI inputUri, VariantMetadata metadata, List<StudyConfiguration> studyConfigurations) throws StorageEngineException, IOException { Path input = Paths.get(inputUri.getPath()); Map<String, LinkedHashMap<String, Integer>> samplesPositions = new HashMap<>(); for (StudyConfiguration sc : studyConfigurations) { LinkedHashMap<String, Integer> map = StudyConfiguration.getSortedIndexedSamplesPosition(sc); samplesPositions.put(sc.getStudyName(), map); samplesPositions.put(String.valueOf(sc.getStudyId()), map); } VariantReader variantReader = new VariantAvroReader(input.toAbsolutePath().toFile(), samplesPositions); ProgressLogger progressLogger = new ProgressLogger("Loaded variants"); ParallelTaskRunner.Task<Variant, Document> converterTask = new VariantToDocumentConverter(studyConfigurations, metadata, progressLogger); DataWriter<Document> writer = new MongoDBVariantDocumentDBWriter(variantsCollection); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(5).setSorted(false).setBatchSize(200).build(); ParallelTaskRunner<Variant, Document> ptr = new ParallelTaskRunner<>(variantReader, converterTask, writer, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error importing variants", e); } }
@Test public void testApplySpeed() throws Exception { int size = 1000; final List<VcfSliceProtos.VcfSlice> lst = new ArrayList<>(); DataWriter<VcfSliceProtos.VcfSlice> collector = new DataWriter<VcfSliceProtos.VcfSlice>(){ @Override public boolean write(List<VcfSliceProtos.VcfSlice> batch) { return lst.addAll(batch); } @Override public boolean write(VcfSliceProtos.VcfSlice elem) { return lst.add(elem); } }; long curr = System.currentTimeMillis(); for (int i = 0; i < 10; i++) { ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> parallelRunner = createParallelRunner(size, collector); parallelRunner.run(); } assertEquals(Integer.valueOf(2*10), Integer.valueOf(lst.size())); System.out.println(System.currentTimeMillis() - curr); }
@Test public void testApply() throws Exception { int size = 1000; final List<VcfSliceProtos.VcfSlice> lst = new ArrayList<>(); DataWriter<VcfSliceProtos.VcfSlice> collector = new DataWriter<VcfSliceProtos.VcfSlice>(){ @Override public boolean write(List<VcfSliceProtos.VcfSlice> batch) { return lst.addAll(batch); } @Override public boolean write(VcfSliceProtos.VcfSlice elem) { return lst.add(elem); } } ; ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> parallelRunner = createParallelRunner(size, collector); parallelRunner.run(); assertEquals(Integer.valueOf(2), Integer.valueOf(lst.size())); }
ParallelTaskRunner<String, Variant> runner = new ParallelTaskRunner<>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run(); ParallelTaskRunner<Variant, Variant> runner = new ParallelTaskRunner<Variant, Variant>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run();
ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading stats", e);
logger.info("Starting stats creation for cohorts {}", cohorts.keySet()); long start = System.currentTimeMillis(); runner.run(); logger.info("Finishing stats creation, time: {}ms", System.currentTimeMillis() - start); } catch (ExecutionException e) {
ParallelTaskRunner<Variant, VariantAnnotation> parallelTaskRunner = new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config); parallelTaskRunner.run(); } catch (ExecutionException e) { throw new VariantAnnotatorException("Error creating annotations", e);
ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error exporting variants", e);
ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException(String.format("Error while Transforming file %s into %s", input, outputVariantsFile), e);
logger.info("starting stats creation for cohorts {}", cohorts.keySet()); long start = System.currentTimeMillis(); runner.run(); logger.info("finishing stats creation, time: {}ms", System.currentTimeMillis() - start); } catch (ExecutionException e) {
ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error executing ParallelTaskRunner", e); }, null, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error executing ParallelTaskRunner", e); }, null, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error executing ParallelTaskRunner", e);
/** * Loads variant annotations from an specified file into the selected Variant DataBase. * * @param uri URI of the annotation file * @param params Specific params. * @throws IOException IOException thrown * @throws StorageEngineException if there is a problem creating or running the {@link ParallelTaskRunner} */ public void loadVariantAnnotation(URI uri, ObjectMap params) throws IOException, StorageEngineException { final int batchSize = params.getInt(DefaultVariantAnnotationManager.BATCH_SIZE, 100); final int numConsumers = params.getInt(DefaultVariantAnnotationManager.NUM_WRITERS, 6); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numConsumers) .setBatchSize(batchSize) .setAbortOnFail(true) .setSorted(false).build(); DataReader<VariantAnnotation> reader; reader = newVariantAnnotationDataReader(uri); try { ProgressLogger progressLogger = new ProgressLogger("Loaded annotations: ", numAnnotationsToLoad.get()); ParallelTaskRunner<VariantAnnotation, ?> ptr = buildLoadAnnotationParallelTaskRunner(reader, config, progressLogger, params); ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading variant annotation", e); } }
private void stageVariants(StudyConfiguration study, int fileId, List<Variant> variants) throws Exception { String archiveTableName = engine.getArchiveTableName(study.getStudyId()); ArchiveTableHelper.createArchiveTableIfNeeded(dbAdaptor.getGenomeHelper(), archiveTableName); // Create empty VariantFileMetadata VariantFileMetadata fileMetadata = new VariantFileMetadata(String.valueOf(fileId), String.valueOf(fileId)); fileMetadata.setSampleIds(variants.get(0).getStudies().get(0).getOrderedSamplesName()); dbAdaptor.getStudyConfigurationManager().updateVariantFileMetadata(String.valueOf(study.getStudyId()), fileMetadata); // Create dummy reader VariantSliceReader reader = getVariantSliceReader(variants, study.getStudyId(), fileId); // Task supplier Supplier<ParallelTaskRunner.Task<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice>> taskSupplier = () -> { VariantToVcfSliceConverter converter = new VariantToVcfSliceConverter(); return list -> { System.out.println("list.size() = " + list.size()); List<VcfSliceProtos.VcfSlice> vcfSlice = new ArrayList<>(list.size()); for (ImmutablePair<Long, List<Variant>> pair : list) { vcfSlice.add(converter.convert(pair.getRight(), pair.getLeft().intValue())); } return vcfSlice; }; }; // Writer VariantHBaseArchiveDataWriter writer = new VariantHBaseArchiveDataWriter(dbAdaptor.getArchiveHelper(study.getStudyId(), fileId), archiveTableName, dbAdaptor.getHBaseManager()); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).build(); ParallelTaskRunner<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice> ptr = new ParallelTaskRunner<>(reader, taskSupplier, writer, config); // Execute stage System.out.println("Stage start!"); ptr.run(); System.out.println("Stage finished!"); }
try { Runtime.getRuntime().addShutdownHook(hook); ptr.run(); stageSuccess(fileMetadata); } finally {
try { Runtime.getRuntime().addShutdownHook(hook); ptr.run(); getStudyConfigurationManager().atomicSetStatus(studyId, BatchFileOperation.Status.DONE, DIRECT_LOAD.key(), fileIds); } finally {
new ParallelTaskRunner<>(sliceReader, task, null, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading file " + input, e);
ptrMerge.run(); } catch (ExecutionException e) { logger.info("Write result: {}", variantLoader.getResult());
ParallelTaskRunner<ImmutablePair<Long, List<Variant>>, VcfSliceProtos.VcfSlice> ptr = new ParallelTaskRunner<>(reader, task, null, config); ptr.run();