private void runBenchmark() { try { FastaIndexManager fastaIndexManager = getFastaIndexManger(); DirectoryStream<Path> stream = Files.newDirectoryStream(input, entry -> { return entry.getFileName().toString().endsWith(".vep"); }); DataWriter<Pair<VariantAnnotationDiff, VariantAnnotationDiff>> dataWriter = new BenchmarkDataWriter("VEP", "CellBase", output); ParallelTaskRunner.Config config = new ParallelTaskRunner.Config(numThreads, batchSize, QUEUE_CAPACITY, false); List<ParallelTaskRunner.TaskWithException<VariantAnnotation, Pair<VariantAnnotationDiff, VariantAnnotationDiff>, Exception>> variantAnnotatorTaskList = getBenchmarkTaskList(fastaIndexManager); for (Path entry : stream) { logger.info("Processing file '{}'", entry.toString()); DataReader dataReader = new VepFormatReader(input.resolve(entry.getFileName()).toString()); ParallelTaskRunner<VariantAnnotation, Pair<VariantAnnotationDiff, VariantAnnotationDiff>> runner = new ParallelTaskRunner<>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run(); } } catch (Exception e) { e.printStackTrace(); } }
/** * @param reader Unique DataReader. If null, empty batches will be generated * @param tasks Generated Tasks. Each task will be used in one thread. Will use tasks.size() as "numTasks". * @param writer Unique DataWriter. If null, data generated by the task will be lost. * @param config configuration. * @throws IllegalArgumentException Exception. */ public ParallelTaskRunner(DataReader<I> reader, List<? extends org.opencb.commons.run.Task<I, O>> tasks, DataWriter<O> writer, Config config) { this.config = config; this.reader = reader; this.writer = writer; this.tasks = new ArrayList<>(tasks); check(); }
init(); doSubmit(new TaskRunnable(task)); doSubmit(new WriterRunnable(writer)); interrupted = readLoop(); //Use the main thread for reading logger.info("read: timeReading = " + prettyTime(timeReading) + "s"); logger.info("read: timeBlockedAtPutRead = " + prettyTime(timeBlockedAtPutRead) + "s"); logger.info("task; timeBlockedAtTakeRead = " + prettyTime(timeBlockedAtTakeRead) + "s(total)" + " ~" + prettyTime(timeBlockedAtTakeRead / config.numTasks) + "s/thread"); logger.info("task; timeTaskApply = " + prettyTime(timeTaskApply) + "s(total)" + " ~" + prettyTime(timeTaskApply / config.numTasks) + "s/thread"); logger.info("task; timeBlockedAtPutWrite = " + prettyTime(timeBlockedAtPutWrite) + "s(total)" + " ~" + prettyTime(timeBlockedAtPutWrite / config.numTasks) + "s/thread"); logger.info("write: timeBlockedWatingDataToWrite = " + prettyTime(timeBlockedAtTakeWrite) + "s"); logger.info("write: timeWriting = " + prettyTime(timeWriting) + "s"); logger.info("total: = " + prettyTime(System.nanoTime() - start) + "s");
Batch<I> batch; batch = readBatch(); CompletableFuture<Batch<O>> completableFuture = new CompletableFuture<>(); while (!writeBlockingQueueFuture.offer(completableFuture, TIMEOUT_CHECK, TimeUnit.SECONDS)) { if (isAbortPending()) { break; if (!isJobsRunning()) { throw new IllegalStateException(String.format("No runners but queue with %s items!!!", readBlockingQueue.size())); if (isAbortPending()) { batch = readBatch();
public void run() throws ExecutionException { try { run(Long.MAX_VALUE, TimeUnit.DAYS); } catch (InterruptedException e) { throw new ExecutionException("Error while running ParallelTaskRunner. Found " + interruptions.size() + " interruptions.", interruptions.get(0)); } }
protected ParallelTaskRunner<VariantAnnotation, ?> buildLoadAnnotationParallelTaskRunner( DataReader<VariantAnnotation> reader, ParallelTaskRunner.Config config, ProgressLogger progressLogger, ObjectMap params) { return new ParallelTaskRunner<>(reader, () -> newVariantAnnotationDBWriter(dbAdaptor, new QueryOptions(params)) .setProgressLogger(progressLogger), null, config); }
@Test public void testApplySpeed() throws Exception { int size = 1000; final List<VcfSliceProtos.VcfSlice> lst = new ArrayList<>(); DataWriter<VcfSliceProtos.VcfSlice> collector = new DataWriter<VcfSliceProtos.VcfSlice>(){ @Override public boolean write(List<VcfSliceProtos.VcfSlice> batch) { return lst.addAll(batch); } @Override public boolean write(VcfSliceProtos.VcfSlice elem) { return lst.add(elem); } }; long curr = System.currentTimeMillis(); for (int i = 0; i < 10; i++) { ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> parallelRunner = createParallelRunner(size, collector); parallelRunner.run(); } assertEquals(Integer.valueOf(2*10), Integer.valueOf(lst.size())); System.out.println(System.currentTimeMillis() - curr); }
@Override protected ParallelTaskRunner<VariantAnnotation, ?> buildLoadAnnotationParallelTaskRunner( DataReader<VariantAnnotation> reader, ParallelTaskRunner.Config config, ProgressLogger progressLogger, ObjectMap params) { if (VariantPhoenixHelper.DEFAULT_TABLE_TYPE == PTableType.VIEW || params.getBoolean(HadoopVariantStorageEngine.VARIANT_TABLE_INDEXES_SKIP, false)) { int currentAnnotationId = dbAdaptor.getStudyConfigurationManager().getProjectMetadata().first() .getAnnotation().getCurrent().getId(); VariantAnnotationToHBaseConverter task = new VariantAnnotationToHBaseConverter(dbAdaptor.getGenomeHelper(), progressLogger, currentAnnotationId); VariantAnnotationHadoopDBWriter writer = new VariantAnnotationHadoopDBWriter( dbAdaptor.getHBaseManager(), dbAdaptor.getVariantTable(), dbAdaptor.getGenomeHelper().getColumnFamily()); return new ParallelTaskRunner<>(reader, task, writer, config); } else { return new ParallelTaskRunner<>(reader, () -> dbAdaptor.newAnnotationLoader(new QueryOptions(params)) .setProgressLogger(progressLogger), null, config); } }
@Override public void importData(URI inputUri, VariantMetadata metadata, List<StudyConfiguration> studyConfigurations) throws StorageEngineException, IOException { Path input = Paths.get(inputUri.getPath()); Map<String, LinkedHashMap<String, Integer>> samplesPositions = new HashMap<>(); for (StudyConfiguration sc : studyConfigurations) { LinkedHashMap<String, Integer> map = StudyConfiguration.getSortedIndexedSamplesPosition(sc); samplesPositions.put(sc.getStudyName(), map); samplesPositions.put(String.valueOf(sc.getStudyId()), map); } VariantReader variantReader = new VariantAvroReader(input.toAbsolutePath().toFile(), samplesPositions); ProgressLogger progressLogger = new ProgressLogger("Loaded variants"); ParallelTaskRunner.Task<Variant, Document> converterTask = new VariantToDocumentConverter(studyConfigurations, metadata, progressLogger); DataWriter<Document> writer = new MongoDBVariantDocumentDBWriter(variantsCollection); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(5).setSorted(false).setBatchSize(200).build(); ParallelTaskRunner<Variant, Document> ptr = new ParallelTaskRunner<>(variantReader, converterTask, writer, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error importing variants", e); } }
@Test public void testApply() throws Exception { int size = 1000; final List<VcfSliceProtos.VcfSlice> lst = new ArrayList<>(); DataWriter<VcfSliceProtos.VcfSlice> collector = new DataWriter<VcfSliceProtos.VcfSlice>(){ @Override public boolean write(List<VcfSliceProtos.VcfSlice> batch) { return lst.addAll(batch); } @Override public boolean write(VcfSliceProtos.VcfSlice elem) { return lst.add(elem); } } ; ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> parallelRunner = createParallelRunner(size, collector); parallelRunner.run(); assertEquals(Integer.valueOf(2), Integer.valueOf(lst.size())); }
public ParallelTaskRunner<Variant, VcfSliceProtos.VcfSlice> createParallelRunner(int size, DataWriter<VcfSliceProtos.VcfSlice> collector) throws Exception { VcfVariantReader reader = VcfVariantReaderTest.createReader(size); Configuration conf = new Configuration(); ArchiveTableHelper helper = new ArchiveTableHelper(conf, 1, new VariantFileMetadata("1", "1")); ParallelTaskRunner.Task<Variant, VcfSliceProtos.VcfSlice> task = new VariantHbaseTransformTask(helper); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(1) .setBatchSize(10) .setAbortOnFail(true) .setSorted(false).build(); return new ParallelTaskRunner<>( reader, () -> task, collector, config ); }
/** * @param reader Unique DataReader. If null, empty batches will be generated * @param task Task to be used. Will be used the same instance in all threads * @param writer Unique DataWriter. If null, data generated by the task will be lost. * @param config configuration. * @throws IllegalArgumentException Exception. */ public ParallelTaskRunner(DataReader<I> reader, org.opencb.commons.run.Task<I, O> task, DataWriter<O> writer, Config config) { this.config = config; this.reader = reader; this.writer = writer; this.tasks = new ArrayList<>(config.numTasks); for (int i = 0; i < config.numTasks; i++) { tasks.add(task); } check(); }
new ParallelTaskRunner<>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run(); + VARIATION_ANNOTATION_FILE_PREFIX + chromosome + ".json.gz"); ParallelTaskRunner<Variant, Variant> runner = new ParallelTaskRunner<Variant, Variant>(dataReader, variantAnnotatorTaskList, dataWriter, config); runner.run();
/** * Loads variant annotations from an specified file into the selected Variant DataBase. * * @param uri URI of the annotation file * @param params Specific params. * @throws IOException IOException thrown * @throws StorageEngineException if there is a problem creating or running the {@link ParallelTaskRunner} */ public void loadVariantAnnotation(URI uri, ObjectMap params) throws IOException, StorageEngineException { final int batchSize = params.getInt(DefaultVariantAnnotationManager.BATCH_SIZE, 100); final int numConsumers = params.getInt(DefaultVariantAnnotationManager.NUM_WRITERS, 6); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder() .setNumTasks(numConsumers) .setBatchSize(batchSize) .setAbortOnFail(true) .setSorted(false).build(); DataReader<VariantAnnotation> reader; reader = newVariantAnnotationDataReader(uri); try { ProgressLogger progressLogger = new ProgressLogger("Loaded annotations: ", numAnnotationsToLoad.get()); ParallelTaskRunner<VariantAnnotation, ?> ptr = buildLoadAnnotationParallelTaskRunner(reader, config, progressLogger, params); ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading variant annotation", e); } }
/** * @param reader Unique DataReader. If null, empty batches will be generated. * @param taskSupplier TaskGenerator. Will generate a new task for each thread. * @param writer Unique DataWriter. If null, data generated by the task will be lost. * @param config configuration. * @throws IllegalArgumentException Exception. */ public ParallelTaskRunner(DataReader<I> reader, Supplier<? extends org.opencb.commons.run.Task<I, O>> taskSupplier, DataWriter<O> writer, Config config) { this.config = config; this.reader = reader; this.writer = writer; this.tasks = new ArrayList<>(config.numTasks); for (int i = 0; i < config.numTasks; i++) { tasks.add(taskSupplier.get()); } check(); }
dbWriter.setProgressLogger(progressLogger); writers.add(dbWriter); ptr = new ParallelTaskRunner<>( dataReader, batch -> batch, ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error loading stats", e);
ParallelTaskRunner<Variant, Variant> ptr = new ParallelTaskRunner<>(variantDBReader, progressTask, variantDataWriter, config); try { ptr.run(); } catch (ExecutionException e) { throw new StorageEngineException("Error exporting variants", e);
.setSorted(false).build(); ParallelTaskRunner<Variant, VariantAnnotation> parallelTaskRunner = new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config); parallelTaskRunner.run(); } catch (ExecutionException e) { throw new VariantAnnotatorException("Error creating annotations", e);
ParallelTaskRunner<Document, String> runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try { logger.info("Starting stats creation for cohorts {}", cohorts.keySet()); long start = System.currentTimeMillis(); runner.run(); logger.info("Finishing stats creation, time: {}ms", System.currentTimeMillis() - start); } catch (ExecutionException e) {
ParallelTaskRunner runner = new ParallelTaskRunner<>(reader, tasks, writer, config); try { logger.info("starting stats creation for cohorts {}", cohorts.keySet()); long start = System.currentTimeMillis(); runner.run(); logger.info("finishing stats creation, time: {}ms", System.currentTimeMillis() - start); } catch (ExecutionException e) {