public String getId() { return impl.getId(); }
public String getPath() { return impl.getPath(); }
public List<String> getSampleIds() { return impl.getSampleIds() == null ? null : Collections.unmodifiableList(impl.getSampleIds()); }
public VcfSliceToVariantListConverter(VariantStudyMetadata metadata) { this(VariantFileMetadata.getSamplesPositionMap(metadata.getFiles().get(0).getSampleIds()), metadata.getFiles().get(0).getId(), metadata.getId()); }
VariantToDocumentConverter(List<StudyConfiguration> studies, VariantMetadata metadata, ProgressLogger progressLogger) { DocumentToSamplesConverter samplesConverter = new DocumentToSamplesConverter(studies); DocumentToStudyVariantEntryConverter studyConverter = new DocumentToStudyVariantEntryConverter(false, samplesConverter); DocumentToVariantStatsConverter statsConverter = new DocumentToVariantStatsConverter(studies); variantConverter = new DocumentToVariantConverter(studyConverter, statsConverter); this.studiesIdRemap = new HashMap<>(); this.fileIdRemap = new HashMap<>(); VariantMetadataManager metadataManager = new VariantMetadataManager().setVariantMetadata(metadata); studies.forEach((sc) -> { VariantStudyMetadata studyMetadata = metadataManager.getVariantStudyMetadata(sc.getStudyName()); this.studiesIdRemap.put(sc.getStudyName(), String.valueOf(sc.getStudyId())); sc.getFileIds().forEach((name, id) -> fileIdRemap.put(name, String.valueOf(id))); for (VariantFileMetadata fileMetadata : studyMetadata.getFiles()) { String id = fileIdRemap.get(fileMetadata.getPath()); if (id != null) { fileIdRemap.put(fileMetadata.getId(), id); } } }); this.progressLogger = progressLogger; }
/** * Add a variant file metadata (from VCF file and header) to a given variant study metadata (from study ID). * * @param filename VCF filename (as an ID) * @param vcfHeader VCF header * @param studyId Study ID */ public void addFile(String filename, VCFHeader vcfHeader, String studyId) { // sanity check if (StringUtils.isEmpty(filename)) { logger.error("VCF filename is empty or null: '{}'", filename); return; } if (vcfHeader == null) { logger.error("VCF header is missingDataset not found. Check your study ID: '{}'", studyId); return; } VCFHeaderToVariantFileHeaderConverter headerConverter = new VCFHeaderToVariantFileHeaderConverter(); VariantFileMetadata variantFileMetadata = new VariantFileMetadata(); variantFileMetadata.setId(filename); variantFileMetadata.setSampleIds(vcfHeader.getSampleNamesInOrder()); variantFileMetadata.setHeader(headerConverter.convert(vcfHeader)); addFile(variantFileMetadata, studyId); }
public List<StudyConfiguration> toStudyConfigurations(VariantMetadata variantMetadata) { List<StudyConfiguration> studyConfigurations = new ArrayList<>(variantMetadata.getStudies().size()); int id = 1; VariantMetadataManager metadataManager = new VariantMetadataManager().setVariantMetadata(variantMetadata); for (VariantStudyMetadata studyMetadata : variantMetadata.getStudies()) { StudyConfiguration sc = new StudyConfiguration(id++, studyMetadata.getId()); studyConfigurations.add(sc); List<Sample> samples = metadataManager.getSamples(studyMetadata.getId()); for (Sample sample : samples) { sc.getSampleIds().put(sample.getId(), id++); } for (VariantFileMetadata fileMetadata : studyMetadata.getFiles()) { int fileId = id++; sc.getIndexedFiles().add(fileId); sc.getFileIds().put(fileMetadata.getPath(), fileId); List<Integer> sampleIds = toSampleIds(sc, fileMetadata.getSampleIds()); sc.getSamplesInFiles().put(fileId, new LinkedHashSet<>(sampleIds)); } for (Cohort cohort : studyMetadata.getCohorts()) { int cohortId = id++; sc.getCohortIds().put(cohort.getId(), cohortId); sc.getCalculatedStats().add(cohortId); sc.getCohorts().put(cohortId, new HashSet<>(toSampleIds(sc, cohort.getSampleIds()))); } sc.setVariantHeader(studyMetadata.getAggregatedHeader()); sc.setAggregation(studyMetadata.getAggregation()); studyMetadata.getAttributes().forEach(sc.getAttributes()::put); } return studyConfigurations; }
public VariantFileMetadata setSampleIds(List<String> sampleIds) { impl.setSampleIds(sampleIds); samplesPosition = null; return this; }
VariantFileMetadata() { impl = new org.opencb.biodata.models.variant.metadata.VariantFileMetadata(); samplesPosition = null; }
@Override public String toString() { return impl.toString(); }
public VariantFileHeader getHeader() { return impl.getHeader(); }
public VariantFileMetadata setId(String id) { impl.setId(id); return this; }
public VariantFileMetadata setHeader(VariantFileHeader header) { impl.setHeader(header); return this; }
/** * Print to the standard output a summary of the variant metadata manager. * * @throws IOException IOException */ public void printSummary() { StringBuilder res = new StringBuilder(); res.append("Num. studies: ").append(variantMetadata.getStudies().size()).append("\n"); int counter, studyCounter = 0; for (VariantStudyMetadata study: variantMetadata.getStudies()) { studyCounter++; res.append("\tStudy #").append(studyCounter).append(": ").append(study.getId()).append("\n"); res.append("\tNum. files: ").append(study.getFiles().size()).append("\n"); counter = 0; for (VariantFileMetadata file: study.getFiles()) { counter++; res.append("\t\tFile #").append(counter).append(": ").append(file.getId()); res.append(" (").append(file.getSampleIds().size()).append(" samples)\n"); } res.append("\tNum. cohorts: ").append(study.getCohorts().size()).append("\n"); counter = 0; for (Cohort cohort: study.getCohorts()) { counter++; res.append("\t\tCohort #").append(counter).append(": ").append(cohort.getId()); res.append(" (").append(cohort.getSampleIds().size()).append(" samples)\n"); } } System.out.println(res.toString()); }
protected VariantMetadata makeVariantMetadata(List<StudyConfiguration> studyConfigurations, ProjectMetadata projectMetadata, Map<Integer, List<Integer>> returnedSamples, Map<Integer, List<Integer>> returnedFiles, QueryOptions queryOptions) throws StorageEngineException { VariantMetadata metadata = new VariantMetadataConverter() .toVariantMetadata(studyConfigurations, projectMetadata, returnedSamples, returnedFiles); Map<String, StudyConfiguration> studyConfigurationMap = studyConfigurations.stream() .collect(Collectors.toMap(StudyConfiguration::getStudyName, Function.identity())); for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { StudyConfiguration studyConfiguration = studyConfigurationMap.get(studyMetadata.getId()); List<Integer> fileIds = studyMetadata.getFiles().stream() .map(fileMetadata -> { Integer fileId = studyConfiguration.getFileIds().get(fileMetadata.getId()); if (fileId == null) { fileId = studyConfiguration.getFileIds().get(fileMetadata.getPath()); } return fileId; }).collect(Collectors.toList()); if (fileIds != null && !fileIds.isEmpty()) { Query query = new Query() .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.STUDY_ID.key(), studyConfiguration.getStudyId()) .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.FILE_ID.key(), fileIds); scm.variantFileMetadataIterator(query, new QueryOptions()).forEachRemaining(fileMetadata -> { studyMetadata.getFiles().removeIf(file -> file.getId().equals(fileMetadata.getId())); studyMetadata.getFiles().add(fileMetadata.getImpl()); }); } } return metadata; }
@Override public boolean pre() { try { processHeader(); // Copy all the read metadata to the VariantSource object // TODO May it be that Vcf4 wasn't necessary anymore? // This Vcf4 object is not necessary anymore. Do not include it's information. // The header parser contains bugs and misses information. // Use htsjdk parser instead // source.addMetadata("fileformat", vcf4.getFileFormat()); // source.addMetadata("INFO", vcf4.getInfo().values()); // source.addMetadata("FILTER", vcf4.getFilter().values()); // source.addMetadata("FORMAT", vcf4.getFormat().values()); // for (Map.Entry<String, String> otherMeta : vcf4.getMetaInformation().entrySet()) { // source.addMetadata(otherMeta.getKey(), otherMeta.getValue()); // } metadata.getFiles().get(0).setSampleIds(vcf4.getSampleNames()); } catch (IOException | FileFormatException ex) { Logger.getLogger(VariantVcfReader.class.getName()).log(Level.SEVERE, null, ex); return false; } return true; }
public VariantFileMetadata(String id, String path, List<String> sampleIds, org.opencb.biodata.models.variant.metadata.VariantSetStats stats, VariantFileHeader header) { impl = new org.opencb.biodata.models.variant.metadata.VariantFileMetadata(id, path, sampleIds, stats, header, new HashMap<>()); samplesPosition = null; }
private void update(String studyId, VariantFileMetadata metadata) throws IOException { Objects.requireNonNull(metadata); ensureTableExists(); Integer fileId = Integer.valueOf(metadata.getId()); checkFileId(fileId); Put put = new Put(getVariantFileMetadataRowKey(Integer.valueOf(studyId), fileId)); put.addColumn(this.family, getValueColumn(), metadata.getImpl().toString().getBytes()); put.addColumn(this.family, getTypeColumn(), Type.VARIANT_FILE_METADATA.bytes()); hBaseManager.act(tableName, table -> { table.put(put); }); }
if (header == null) { if (variantStudyMetadata.getFiles() != null && variantStudyMetadata.getFiles().size() == 1) { header = variantStudyMetadata.getFiles().get(0).getHeader();
/** * Remove a variant file metadata of a given variant study metadata (from study ID). * * @param file File * @param studyId Study ID */ public void removeFile(VariantFileMetadata file, String studyId) { // Sanity check if (file == null) { logger.error("Variant file metadata is null."); return; } removeFile(file.getId(), studyId); }