public void addVariantFileHeader(VariantFileHeader header, List<String> formats) { Map<String, Map<String, VariantFileHeaderComplexLine>> map = new HashMap<>(); for (VariantFileHeaderComplexLine line : this.variantHeader.getComplexLines()) { Map<String, VariantFileHeaderComplexLine> keyMap = map.computeIfAbsent(line.getKey(), key -> new HashMap<>()); keyMap.put(line.getId(), line); for (VariantFileHeaderComplexLine line : header.getComplexLines()) { if (formats == null || !line.getKey().equalsIgnoreCase("format") || formats.contains(line.getId())) { Map<String, VariantFileHeaderComplexLine> keyMap = map.computeIfAbsent(line.getKey(), key -> new HashMap<>()); variantHeader.getComplexLines().add(line); Map<String, String> simpleLines = this.variantHeader.getSimpleLines() .stream() .collect(Collectors.toMap(VariantFileHeaderSimpleLine::getKey, VariantFileHeaderSimpleLine::getValue)); header.getSimpleLines().forEach((line) -> { String oldValue = simpleLines.put(line.getKey(), line.getValue()); if (oldValue != null && !oldValue.equals(line.getValue())) { this.variantHeader.setSimpleLines(simpleLines.entrySet() .stream() .map(e -> new VariantFileHeaderSimpleLine(e.getKey(), e.getValue()))
public StudyConfiguration(int studyId, String studyName, Map<String, Integer> fileIds, Map<String, Integer> sampleIds, Map<String, Integer> cohortIds, Map<Integer, Set<Integer>> cohorts) { this.studyId = studyId; this.studyName = studyName; this.fileIds = HashBiMap.create(fileIds == null ? Collections.emptyMap() : fileIds); this.filePaths = HashBiMap.create(); this.sampleIds = HashBiMap.create(sampleIds == null ? Collections.emptyMap() : sampleIds); this.cohortIds = HashBiMap.create(cohortIds == null ? Collections.emptyMap() : cohortIds); this.cohorts = cohorts == null ? new HashMap<>() : cohorts; this.indexedFiles = new LinkedHashSet<>(); this.headers = new HashMap<>(); this.samplesInFiles = new HashMap<>(); this.calculatedStats = new LinkedHashSet<>(); this.invalidStats = new LinkedHashSet<>(); this.searchIndexedSampleSets = new HashMap<>(); this.searchIndexedSampleSetsStatus = new HashMap<>(); this.batches = new ArrayList<>(); this.aggregation = Aggregation.NONE; this.timeStamp = 0L; this.variantHeader = VariantFileHeader.newBuilder().setVersion("").build(); this.attributes = new ObjectMap(); }
@Override public List<VS> apply(List<VariantFileMetadata> variantFileMetadata) { Set<VS> gaVariantSets = new LinkedHashSet<>(); for (VariantFileMetadata fileMetadata : variantFileMetadata) { List<Object> metadata = new ArrayList<>(); for (VariantFileHeaderComplexLine line : fileMetadata.getHeader().getComplexLines()) { Map<String, List<String>> info = line.getGenericFields().entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, value -> Arrays.asList(value.getValue().split(",")))); metadata.add(factory.newVariantSetMetadata(line.getKey(), null, line.getId(), line.getType(), line.getNumber(), line.getDescription(), info)); } fileMetadata.getHeader().getSimpleLines().forEach(line -> metadata.add(factory.newVariantSetMetadata(line.getKey(), line.getValue(), null, null, null, null, Collections.emptyMap()))); @SuppressWarnings("unchecked") VS variantSet = (VS) factory.newVariantSet(fileMetadata.getId(), fileMetadata.getPath(), "", "", (List) metadata); gaVariantSets.add(variantSet); } return new ArrayList<>(gaVariantSets); }
List<VariantFileHeaderSimpleLine> headerSimpleLines = new ArrayList<>(studyHeader.getSimpleLines().size()); studyHeader.getSimpleLines().forEach(line -> { if (!StudyConfiguration.UNKNOWN_HEADER_ATTRIBUTE.equals(line.getValue())) { headerSimpleLines.add(line); VariantFileHeader aggregatedHeader = new VariantFileHeader(studyHeader.getVersion(), studyHeader.getComplexLines(), headerSimpleLines);
VariantFileHeader variantFileHeader = new VariantFileHeader(vcfHeader.getFileFormat(), new LinkedList<>(), new LinkedList<>()); vcfHeader.getMeta().forEach((key, values) -> { for (Object value : values) { if (value instanceof String) { variantFileHeader.getSimpleLines().add(new VariantFileHeaderSimpleLine(key, ((String) value))); } else if (value instanceof Map) { Map<String, String> map = (Map<String, String>) value; variantFileHeader.getComplexLines().add(new VariantFileHeaderComplexLine(key, takeFromMap(map, "ID"), takeFromMap(map, "Description"),
public Map<String, VariantFileHeaderComplexLine> getVariantHeaderLines(String key) { return variantHeader.getComplexLines() .stream() .filter(l -> l.getKey().equalsIgnoreCase(key)) .collect(Collectors.toMap(VariantFileHeaderComplexLine::getId, l -> l)); }
List<VariantFileHeaderSimpleLine> simpleLines = new ArrayList<>(); VariantFileHeader avroHeader = new VariantFileHeader(); for (VCFHeaderLine line : header.getMetaDataInInputOrder()) { if (line.getKey().equalsIgnoreCase("fileFormat")) { avroHeader.setVersion(line.getValue()); continue; avroHeader.setComplexLines(complexLines); avroHeader.setSimpleLines(simpleLines);
@Override public VariantFileHeader build() { try { VariantFileHeader record = new VariantFileHeader(); record.version = fieldSetFlags()[0] ? this.version : (java.lang.String) defaultValue(fields()[0]); record.complexLines = fieldSetFlags()[1] ? this.complexLines : (java.util.List<org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine>) defaultValue(fields()[1]); record.simpleLines = fieldSetFlags()[2] ? this.simpleLines : (java.util.List<org.opencb.biodata.models.variant.metadata.VariantFileHeaderSimpleLine>) defaultValue(fields()[2]); return record; } catch (Exception e) { throw new org.apache.avro.AvroRuntimeException(e); } } }
public static List<String> getFixedAttributes(StudyConfiguration studyConfiguration) { return studyConfiguration.getVariantHeader() .getComplexLines() .stream() .filter(line -> line.getKey().equalsIgnoreCase("INFO")) .map(VariantFileHeaderComplexLine::getId) .collect(Collectors.toList()); }
@Override public VCFHeader convert(VariantFileHeader variantHeader) { HashSet<VCFHeaderLine> meta = new HashSet<>(variantHeader.getSimpleLines().size() + variantHeader.getComplexLines().size()); variantHeader.getSimpleLines().forEach(line -> meta.add(new VCFHeaderLine(line.getKey(), line.getValue()))); for (VariantFileHeaderComplexLine line : variantHeader.getComplexLines()) { VCFHeaderLine headerLine; VCFHeaderLineCount count = getVCFHeaderLineCount(line);
public VariantFileHeaderComplexLine getVariantHeaderLine(String key, String id) { return variantHeader.getComplexLines() .stream() .filter(l -> l.getKey().equalsIgnoreCase(key) && l.getId().equalsIgnoreCase(id)) .findFirst().orElse(null); }
public void copy(StudyConfiguration other) { this.studyId = other.studyId; this.studyName = other.studyName; this.fileIds = HashBiMap.create(other.fileIds == null ? Collections.emptyMap() : other.fileIds); this.filePaths = HashBiMap.create(other.filePaths == null ? Collections.emptyMap() : other.filePaths); this.sampleIds = HashBiMap.create(other.sampleIds == null ? Collections.emptyMap() : other.sampleIds); this.cohortIds = HashBiMap.create(other.cohortIds == null ? Collections.emptyMap() : other.cohortIds); this.cohorts = new HashMap<>(other.cohorts); this.indexedFiles = new LinkedHashSet<>(other.indexedFiles); this.headers = new HashMap<>(other.headers); this.samplesInFiles = new HashMap<>(other.samplesInFiles); this.calculatedStats = new LinkedHashSet<>(other.calculatedStats); this.invalidStats = new LinkedHashSet<>(other.invalidStats); this.searchIndexedSampleSets = other.searchIndexedSampleSets == null ? new HashMap<>() : new HashMap<>(other.searchIndexedSampleSets); this.searchIndexedSampleSetsStatus = other.searchIndexedSampleSetsStatus == null ? new HashMap<>() : new HashMap<>(other.searchIndexedSampleSetsStatus); this.batches = new ArrayList<>(other.batches.size()); for (BatchFileOperation batch : other.batches) { this.batches.add(new BatchFileOperation(batch)); } this.aggregation = other.aggregation; this.timeStamp = other.timeStamp; if (other.variantHeader == null) { this.variantHeader = VariantFileHeader.newBuilder().setVersion("").build(); } else { this.variantHeader = VariantFileHeader.newBuilder(other.variantHeader).setVersion("").build(); } this.attributes = new ObjectMap(other.attributes); }
public void configure(VariantFileHeader header) { for (VariantFileHeaderComplexLine line : header.getComplexLines()) { if (line.getKey().equalsIgnoreCase("FORMAT") || line.getKey().equalsIgnoreCase("INFO")) { VCFHeaderLineCount number = VariantFileHeaderToVCFHeaderConverter.getVCFHeaderLineCount(line); VCFHeaderLineType type = VariantFileHeaderToVCFHeaderConverter.getVCFHeaderLineType(line); configure(line.getId(), number, type); } } } }
@Override public synchronized void post() { float meanQuality = (float) (qualSum / qualCount); stats.setMeanQuality(meanQuality); //Var = SumSq / n - mean * mean stats.setStdDevQuality((float) Math.sqrt(qualSumSq / qualCount - meanQuality * meanQuality)); stats.setTiTvRatio(transitionsCount, transversionsCount); Map<String, Integer> chrLengthMap = header.getComplexLines() .stream() .filter(line -> line.getKey().equalsIgnoreCase("contig")) .collect(Collectors.toMap(line -> Region.normalizeChromosome(line.getId()), line -> { String length = line.getGenericFields().get("length"); if (StringUtils.isNumeric(length)) { return Integer.parseInt(length); } else { return -1; } })); stats.getChromosomeStats().forEach((chr, stats) -> { Integer length = chrLengthMap.get(chr); if (length != null && length > 0) { stats.setDensity(stats.getCount() / (float) length); } }); }
@Before public void setUp() throws Exception { HadoopVariantStorageEngine variantStorageManager = getVariantStorageEngine(); clearDB(variantStorageManager.getVariantTableName()); clearDB(variantStorageManager.getArchiveTableName(STUDY_ID)); //Force HBaseConverter to fail if something goes wrong HBaseToVariantConverter.setFailOnWrongVariants(true); engine = getVariantStorageEngine(); dbAdaptor = engine.getDBAdaptor(); sc1 = new StudyConfiguration(1, "study_1"); fileId1 = createNewFile(sc1); fileId2 = createNewFile(sc1); sc1.getVariantHeader().getComplexLines() .add(new VariantFileHeaderComplexLine("INFO", "AD", "", "R", "Integer", Collections.emptyMap())); sc1.getVariantHeader().getComplexLines() .add(new VariantFileHeaderComplexLine("FORMAT", "AD", "", "R", "Integer", Collections.emptyMap())); timestamp = new AtomicLong(1L); }
stream = fileMetadata.getHeader().getComplexLines() .stream() .filter(line -> line.getKey().equals("FORMAT")) ".", VCFHeaderLineType.String.toString(), null); studyConfiguration.getVariantHeader().getComplexLines().add(line); } else { throw new StorageEngineException("Unknown FORMAT field '" + extraFormatField + '\'');