/** * Get the archive column name for a file given a VariantFileMetadata. * * @param fileMetadata VariantFileMetadata * @return Column name or Qualifier */ public static String getNonRefColumnName(VariantFileMetadata fileMetadata) { return getNonRefColumnName(Integer.parseInt(fileMetadata.getId())); }
/** * Get the archive column name for a file given a VariantFileMetadata. * * @param fileMetadata VariantFileMetadata * @return Column name or Qualifier */ public static String getRefColumnName(VariantFileMetadata fileMetadata) { return getRefColumnName(Integer.parseInt(fileMetadata.getId())); } /**
protected void setOtherFields(Variant variant, StudyEntry study, VariantFileMetadata fileMetadata, List<String> ids, float quality, String filter, String info, String format, String[] alternateAlleles, String line) { // Fields not affected by the structure of REF and ALT fields if (!ids.isEmpty()) { variant.setIds(ids); } if (quality > -1) { study.addAttribute(fileMetadata.getId(), StudyEntry.QUAL, String.valueOf(quality)); } if (!filter.isEmpty()) { study.addAttribute(fileMetadata.getId(), StudyEntry.FILTER, filter); } if (!info.isEmpty()) { parseInfo(variant, fileMetadata.getId(), study.getStudyId(), info); } study.addAttribute(fileMetadata.getId(), StudyEntry.SRC, line); }
public ArchiveTableHelper(Configuration conf, int studyId, VariantFileMetadata meta) { super(conf, studyId); this.meta.set(meta); fileId = Integer.valueOf(meta.getId()); nonRefColumn = Bytes.toBytes(getNonRefColumnName(meta)); refColumn = Bytes.toBytes(getRefColumnName(meta)); keyFactory = new ArchiveRowKeyFactory(conf); }
@Override public void updateVariantFileMetadata(String studyId, VariantFileMetadata variantFileMetadata) throws StorageEngineException { VARIANT_FILE_METADATAS.put(studyId + "_" + variantFileMetadata.getId(), variantFileMetadata); }
@Override protected void setOtherFields(Variant variant, StudyEntry studyEntry, VariantFileMetadata fileMetadata, List<String> ids, float quality, String filter, String info, String format, String[] alternateAlleles, String line) { // Fields not affected by the structure of REF and ALT fields variant.setIds(ids); if (quality > -1) { studyEntry.addAttribute(fileMetadata.getId(), StudyEntry.QUAL, String.valueOf(quality)); } if (!filter.isEmpty()) { studyEntry.addAttribute(fileMetadata.getId(), StudyEntry.FILTER, filter); } Map<String, String> infoMap = getInfoMap(info); studyEntry.setFormatAsString(format); studyEntry.addAttribute(fileMetadata.getId(), StudyEntry.SRC, line); studyEntry.addAttributes(fileMetadata.getId(), infoMap); }
public ArchiveTableHelper(GenomeHelper helper, int studyId, VariantFileMetadata meta) { super(helper, studyId); this.meta.set(meta); fileId = Integer.valueOf(meta.getId()); nonRefColumn = Bytes.toBytes(getNonRefColumnName(meta)); refColumn = Bytes.toBytes(getRefColumnName(meta)); keyFactory = new ArchiveRowKeyFactory(helper.getConf()); }
@Override public List<CS> convert(VariantFileMetadata fileMetadata) { return convert(fileMetadata.getId(), fileMetadata.getSampleIds()); }
public Document convertToStorageType(String studyId, VariantFileMetadata object) { Document document = super.convertToStorageType(object); document.append("_id", buildId(studyId, object.getId())); return document; }
public static void writeAll(Path path) { ObjectMapper objectMapper = new ObjectMapper(new JsonFactory()).configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); String prefix = "variant_file_metadata_" + NUM_PRINTS.incrementAndGet() + "_"; System.out.println("prefix = " + prefix); System.out.println("VARIANT_FILE_METADATAS = " + VARIANT_FILE_METADATAS.size()); for (VariantFileMetadata fileMetadata : VARIANT_FILE_METADATAS.values()) { try (OutputStream os = new FileOutputStream(path.resolve(prefix + fileMetadata.getId() + ".json").toFile())) { objectMapper.writerWithDefaultPrettyPrinter().writeValue(os, fileMetadata); } catch (IOException e) { throw new UncheckedIOException(e); } } }
/** * Calculates VariantSetStats for a file. * @param studyId StudyId * @param fileMetadata VariantFileMetadata */ public VariantSetStatsCalculator(String studyId, VariantFileMetadata fileMetadata) { this.studyId = studyId; this.metadata = fileMetadata.toVariantStudyMetadata(studyId); files = Collections.singleton(fileMetadata.getId()); samples = new HashSet<>(fileMetadata.getSampleIds()); header = fileMetadata.getHeader(); stats = new VariantSetStats(); fileMetadata.setStats(stats); }
@Override public List<VS> apply(List<VariantFileMetadata> variantFileMetadata) { Set<VS> gaVariantSets = new LinkedHashSet<>(); for (VariantFileMetadata fileMetadata : variantFileMetadata) { List<Object> metadata = new ArrayList<>(); for (VariantFileHeaderComplexLine line : fileMetadata.getHeader().getComplexLines()) { Map<String, List<String>> info = line.getGenericFields().entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, value -> Arrays.asList(value.getValue().split(",")))); metadata.add(factory.newVariantSetMetadata(line.getKey(), null, line.getId(), line.getType(), line.getNumber(), line.getDescription(), info)); } fileMetadata.getHeader().getSimpleLines().forEach(line -> metadata.add(factory.newVariantSetMetadata(line.getKey(), line.getValue(), null, null, null, null, Collections.emptyMap()))); @SuppressWarnings("unchecked") VS variantSet = (VS) factory.newVariantSet(fileMetadata.getId(), fileMetadata.getPath(), "", "", (List) metadata); gaVariantSets.add(variantSet); } return new ArrayList<>(gaVariantSets); }
private void update(String studyId, VariantFileMetadata metadata) throws IOException { Objects.requireNonNull(metadata); ensureTableExists(); Integer fileId = Integer.valueOf(metadata.getId()); checkFileId(fileId); Put put = new Put(getVariantFileMetadataRowKey(Integer.valueOf(studyId), fileId)); put.addColumn(this.family, getValueColumn(), metadata.getImpl().toString().getBytes()); put.addColumn(this.family, getTypeColumn(), Type.VARIANT_FILE_METADATA.bytes()); hBaseManager.act(tableName, table -> { table.put(put); }); }
public Set<String> checkArchiveTableLoadedVariants(StudyConfiguration studyConfiguration, VariantHadoopDBAdaptor dbAdaptor, VariantFileMetadata fileMetadata) { int fileId = Integer.valueOf(fileMetadata.getId()); Set<String> variants = getVariants(dbAdaptor, studyConfiguration, fileId); int expected = fileMetadata.getStats().getVariantTypeCounts().entrySet().stream() .filter(entry -> VARIANT_TYPES.contains(VariantType.valueOf(entry.getKey()))) .map(Map.Entry::getValue) .reduce(Integer::sum) .orElse(0); assertEquals(expected, variants.size()); return variants; }
@Override public void updateVariantFileMetadata(String studyId, VariantFileMetadata metadata) { MongoDBCollection coll = db.getCollection(collectionName); if (Integer.valueOf(metadata.getId()) <= 0) { throw new IllegalArgumentException("FileIds must be integer positive"); } Document document = variantFileMetadataConverter.convertToStorageType(studyId, metadata); String id = document.getString("_id"); document.append("_id", id); QueryOptions options = new QueryOptions(MongoDBCollection.REPLACE, true).append(MongoDBCollection.UPSERT, true); coll.update(Filters.eq("_id", id), document, options); }
@Override public boolean pre() { if (metadataPath != null) { Files.exists(metadataPath); try (InputStream inputStream = FileUtils.newInputStream(metadataPath)) { // Read global JSON file and copy its info into the already available VariantSource object VariantFileMetadata readMetadata = VariantReaderUtils.readVariantFileMetadataFromJson(inputStream); fileMetadata.setId(readMetadata.getId()); fileMetadata.setPath(readMetadata.getPath()); fileMetadata.setHeader(readMetadata.getHeader()); fileMetadata.setSamplesPosition(readMetadata.getSamplesPosition()); fileMetadata.setStats(readMetadata.getStats()); } catch (IOException ex) { throw new UncheckedIOException(ex); } } if (fileMetadata != null) { Map<String, Integer> samplesPosition = fileMetadata.getSamplesPosition(); this.samplesPosition = new LinkedHashMap<>(samplesPosition.size()); String[] samples = new String[samplesPosition.size()]; for (Map.Entry<String, Integer> entry : samplesPosition.entrySet()) { samples[entry.getValue()] = entry.getKey(); } for (int i = 0; i < samples.length; i++) { this.samplesPosition.put(samples[i], i); } } return true; }
protected VariantMetadata makeVariantMetadata(List<StudyConfiguration> studyConfigurations, ProjectMetadata projectMetadata, Map<Integer, List<Integer>> returnedSamples, Map<Integer, List<Integer>> returnedFiles, QueryOptions queryOptions) throws StorageEngineException { VariantMetadata metadata = new VariantMetadataConverter() .toVariantMetadata(studyConfigurations, projectMetadata, returnedSamples, returnedFiles); Map<String, StudyConfiguration> studyConfigurationMap = studyConfigurations.stream() .collect(Collectors.toMap(StudyConfiguration::getStudyName, Function.identity())); for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { StudyConfiguration studyConfiguration = studyConfigurationMap.get(studyMetadata.getId()); List<Integer> fileIds = studyMetadata.getFiles().stream() .map(fileMetadata -> { Integer fileId = studyConfiguration.getFileIds().get(fileMetadata.getId()); if (fileId == null) { fileId = studyConfiguration.getFileIds().get(fileMetadata.getPath()); } return fileId; }).collect(Collectors.toList()); if (fileIds != null && !fileIds.isEmpty()) { Query query = new Query() .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.STUDY_ID.key(), studyConfiguration.getStudyId()) .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.FILE_ID.key(), fileIds); scm.variantFileMetadataIterator(query, new QueryOptions()).forEachRemaining(fileMetadata -> { studyMetadata.getFiles().removeIf(file -> file.getId().equals(fileMetadata.getId())); studyMetadata.getFiles().add(fileMetadata.getImpl()); }); } } return metadata; }
public VariantHadoopArchiveDBIterator(ResultScanner resultScanner, ArchiveTableHelper archiveHelper, QueryOptions options) { this.resultScanner = resultScanner; this.iterator = this.resultScanner.iterator(); this.columnFamily = archiveHelper.getColumnFamily(); this.refColumnBytes = archiveHelper.getRefColumnName(); this.nonRefColumnBytes = archiveHelper.getNonRefColumnName(); VariantFileMetadata fileMetadata = archiveHelper.getFileMetadata(); nonRefConverter = new VcfRecordProtoToVariantConverter(StudyEntry.sortSamplesPositionMap(fileMetadata.getSamplesPosition()), String.valueOf(archiveHelper.getStudyId()), fileMetadata.getId()); refConverter = new VcfRecordProtoToVariantConverter(StudyEntry.sortSamplesPositionMap(fileMetadata.getSamplesPosition()), String.valueOf(archiveHelper.getStudyId()), fileMetadata.getId()); setLimit(options.getLong(QueryOptions.LIMIT)); }
public VariantTransformTask(VCFHeader header, VCFHeaderVersion version, String studyId, VariantFileMetadata fileMetadata, Path outputFileJsonFile, VariantSetStatsCalculator variantStatsTask, boolean includeSrc, boolean generateReferenceBlocks) { this.variantStatsTask = variantStatsTask; this.factory = null; this.fileMetadata = fileMetadata; this.metadata = fileMetadata.toVariantStudyMetadata(studyId); this.outputFileJsonFile = outputFileJsonFile; this.includeSrc = includeSrc; this.vcfCodec = new FullVcfCodec(); this.vcfCodec.setVCFHeader(header, version); this.converter = new VariantContextToVariantConverter(studyId, fileMetadata.getId(), fileMetadata.getSampleIds()); this.normalizer = new VariantNormalizer(true, true, false); normalizer.setGenerateReferenceBlocks(generateReferenceBlocks); }
.forEachRemaining(vs -> assertNotEquals("2", vs.getId()));