/** * Remove a variant study metadata (from study ID). * * @param studyId Study ID */ public void removeVariantStudyMetadata(String studyId) { // Sanity check if (StringUtils.isEmpty(studyId)) { logger.error("Variant study metadata ID {} is null or empty.", studyId); return; } VariantStudyMetadata variantStudyMetadata = getVariantStudyMetadata(studyId); if (variantStudyMetadata == null) { logger.error("Dataset not found. Check your study ID: '{}'", studyId); return; } for (int i = 0; i < variantMetadata.getStudies().size(); i++) { if (studyId.equals(variantMetadata.getStudies().get(i).getId())) { variantMetadata.getStudies().remove(i); return; } } }
public VariantMetadataManager(Species species, String description) { variantMetadata = new VariantMetadata(); variantMetadata.setCreationDate(LocalDateTime.now().toString()); variantMetadata.setSpecies(species); variantMetadata.setDescription(description); mapper = new ObjectMapper(); mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); logger = LoggerFactory.getLogger(VariantMetadataManager.class); }
/** * Retrieve the variant study metadata from its study ID. * * @param studyId Study ID * @return VariantStudyMetadata object */ public VariantStudyMetadata getVariantStudyMetadata(String studyId) { if (studyId != null) { if (variantMetadata.getStudies() == null) { variantMetadata.setStudies(new ArrayList<>()); } for (VariantStudyMetadata study : variantMetadata.getStudies()) { if (studyId.equals(study.getId())) { return study; } } } else { logger.error("Study ID is null"); } return null; }
@Override public VariantMetadata build() { try { VariantMetadata record = new VariantMetadata(); record.version = fieldSetFlags()[0] ? this.version : (java.lang.String) defaultValue(fields()[0]); record.species = fieldSetFlags()[1] ? this.species : (org.opencb.biodata.models.metadata.Species) defaultValue(fields()[1]); record.creationDate = fieldSetFlags()[2] ? this.creationDate : (java.lang.String) defaultValue(fields()[2]); record.description = fieldSetFlags()[3] ? this.description : (java.lang.String) defaultValue(fields()[3]); record.studies = fieldSetFlags()[4] ? this.studies : (java.util.List<org.opencb.biodata.models.variant.metadata.VariantStudyMetadata>) defaultValue(fields()[4]); return record; } catch (Exception e) { throw new org.apache.avro.AvroRuntimeException(e); } } }
public VariantMetadata toVariantMetadata(Collection<StudyConfiguration> studyConfigurations, ProjectMetadata projectMetadata, Map<Integer, List<Integer>> returnedSamples, Map<Integer, List<Integer>> returnedFiles) { List<VariantStudyMetadata> studies = new ArrayList<>(); String specie = projectMetadata.getSpecies(); String assembly = projectMetadata.getAssembly(); for (StudyConfiguration studyConfiguration : studyConfigurations) { VariantStudyMetadata studyMetadata = toVariantStudyMetadata(studyConfiguration, returnedSamples == null ? null : returnedSamples.get(studyConfiguration.getStudyId()), returnedFiles == null ? null : returnedFiles.get(studyConfiguration.getStudyId())); studies.add(studyMetadata); } Species species = Species.newBuilder() .setId(specie) .setAssembly(assembly) .build(); return VariantMetadata.newBuilder() // .setDate(Date.from(Instant.now()).toString()) .setCreationDate(TimeUtils.getTime()) .setStudies(studies) .setVersion(GitRepositoryState.get().getDescribeShort()) .setSpecies(species) .build(); }
@Override public boolean pre() { String study = metadata.getStudies().get(0).getId(); List<String> samples = metadata.getStudies().get(0).getIndividuals().stream() .flatMap(individual -> individual.getSamples().stream()).map(Sample::getId).collect(Collectors.toList()); VCFHeader vcfHeader = new VariantStudyMetadataToVCFHeaderConverter().convert(metadata.getStudies().get(0), annotations); variantContextWriter = VcfUtils.createVariantContextWriter(outputStream, vcfHeader.getSequenceDictionary(), Options.ALLOW_MISSING_FIELDS_IN_HEADER); variantContextWriter.writeHeader(vcfHeader); converter = newConverter(study, samples, annotations); return true; }
/** * Add a variant study metadata. Study ID must not exist. * * @param variantStudyMetadata Variant study metadata to add */ public void addVariantDatasetMetadata(VariantStudyMetadata variantStudyMetadata) { if (variantStudyMetadata != null) { VariantStudyMetadata found = getVariantStudyMetadata(variantStudyMetadata.getId()); // if there is not any study with that ID then we add the new one // TODO we need to think what to do when it exists, should we throw an exception? if (found == null) { if (variantMetadata.getStudies() == null) { variantMetadata.setStudies(new ArrayList<>()); } variantMetadata.getStudies().add(variantStudyMetadata); } else { logger.error("Study ID already exists"); } } }
/** * Print to the standard output a summary of the variant metadata manager. * * @throws IOException IOException */ public void printSummary() { StringBuilder res = new StringBuilder(); res.append("Num. studies: ").append(variantMetadata.getStudies().size()).append("\n"); int counter, studyCounter = 0; for (VariantStudyMetadata study: variantMetadata.getStudies()) { studyCounter++; res.append("\tStudy #").append(studyCounter).append(": ").append(study.getId()).append("\n"); res.append("\tNum. files: ").append(study.getFiles().size()).append("\n"); counter = 0; for (VariantFileMetadata file: study.getFiles()) { counter++; res.append("\t\tFile #").append(counter).append(": ").append(file.getId()); res.append(" (").append(file.getSampleIds().size()).append(" samples)\n"); } res.append("\tNum. cohorts: ").append(study.getCohorts().size()).append("\n"); counter = 0; for (Cohort cohort: study.getCohorts()) { counter++; res.append("\t\tCohort #").append(counter).append(": ").append(cohort.getId()); res.append(" (").append(cohort.getSampleIds().size()).append(" samples)\n"); } } System.out.println(res.toString()); }
if (queryOptions.getBoolean(BASIC_METADATA, false)) { for (VariantStudyMetadata variantStudyMetadata : metadata.getStudies()) { for (Cohort cohort : variantStudyMetadata.getCohorts()) { cohort.setSampleIds(Collections.emptyList()); .collect(Collectors.toMap(StudyConfiguration::getStudyName, StudyConfiguration::getStudyId)); try { for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { String studyId = studyMetadata.getId(); fillStudy(studyId, studyMetadata);
VariantStudyMetadata studyMetadata = manager.getVariantMetadata().getStudies().get(0); exportPedMapFile(vd, studyMetadata, outDir + "/plink");
/** * Load variant metadata file. * * @param path Path to the variant metadata file * @throws IOException IOException */ public void load(Path path) throws IOException { FileUtils.checkPath(path); logger.debug("Loading variant metadata from '{}'", path.toAbsolutePath().toString()); variantMetadata = mapper.readValue(path.toFile(), VariantMetadata.class); // We need to add Individual info fields to their sample annotations to allow more complex queries for (VariantStudyMetadata variantStudyMetadata: variantMetadata.getStudies()) { if (variantStudyMetadata.getIndividuals() != null) { for (org.opencb.biodata.models.metadata.Individual individual : variantStudyMetadata.getIndividuals()) { for (Sample sample : individual.getSamples()) { sample.getAnnotations().put(INDIVIDUAL_ID, individual.getId()); sample.getAnnotations().put(INDIVIDUAL_FAMILY, individual.getFamily()); sample.getAnnotations().put(INDIVIDUAL_FATHER, individual.getFather()); sample.getAnnotations().put(INDIVIDUAL_MOTHER, individual.getMother()); sample.getAnnotations().put(INDIVIDUAL_SEX, individual.getSex()); sample.getAnnotations().put(INDIVIDUAL_PHENOTYPE, individual.getPhenotype()); } } } } }
protected VariantMetadata makeVariantMetadata(List<StudyConfiguration> studyConfigurations, ProjectMetadata projectMetadata, Map<Integer, List<Integer>> returnedSamples, Map<Integer, List<Integer>> returnedFiles, QueryOptions queryOptions) throws StorageEngineException { VariantMetadata metadata = new VariantMetadataConverter() .toVariantMetadata(studyConfigurations, projectMetadata, returnedSamples, returnedFiles); Map<String, StudyConfiguration> studyConfigurationMap = studyConfigurations.stream() .collect(Collectors.toMap(StudyConfiguration::getStudyName, Function.identity())); for (VariantStudyMetadata studyMetadata : metadata.getStudies()) { StudyConfiguration studyConfiguration = studyConfigurationMap.get(studyMetadata.getId()); List<Integer> fileIds = studyMetadata.getFiles().stream() .map(fileMetadata -> { Integer fileId = studyConfiguration.getFileIds().get(fileMetadata.getId()); if (fileId == null) { fileId = studyConfiguration.getFileIds().get(fileMetadata.getPath()); } return fileId; }).collect(Collectors.toList()); if (fileIds != null && !fileIds.isEmpty()) { Query query = new Query() .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.STUDY_ID.key(), studyConfiguration.getStudyId()) .append(VariantFileMetadataDBAdaptor.VariantFileMetadataQueryParam.FILE_ID.key(), fileIds); scm.variantFileMetadataIterator(query, new QueryOptions()).forEachRemaining(fileMetadata -> { studyMetadata.getFiles().removeIf(file -> file.getId().equals(fileMetadata.getId())); studyMetadata.getFiles().add(fileMetadata.getImpl()); }); } } return metadata; }
public List<StudyConfiguration> toStudyConfigurations(VariantMetadata variantMetadata) { List<StudyConfiguration> studyConfigurations = new ArrayList<>(variantMetadata.getStudies().size()); int id = 1; VariantMetadataManager metadataManager = new VariantMetadataManager().setVariantMetadata(variantMetadata); for (VariantStudyMetadata studyMetadata : variantMetadata.getStudies()) { StudyConfiguration sc = new StudyConfiguration(id++, studyMetadata.getId()); studyConfigurations.add(sc); List<Sample> samples = metadataManager.getSamples(studyMetadata.getId()); for (Sample sample : samples) { sc.getSampleIds().put(sample.getId(), id++); } for (VariantFileMetadata fileMetadata : studyMetadata.getFiles()) { int fileId = id++; sc.getIndexedFiles().add(fileId); sc.getFileIds().put(fileMetadata.getPath(), fileId); List<Integer> sampleIds = toSampleIds(sc, fileMetadata.getSampleIds()); sc.getSamplesInFiles().put(fileId, new LinkedHashSet<>(sampleIds)); } for (Cohort cohort : studyMetadata.getCohorts()) { int cohortId = id++; sc.getCohortIds().put(cohort.getId(), cohortId); sc.getCalculatedStats().add(cohortId); sc.getCohorts().put(cohortId, new HashSet<>(toSampleIds(sc, cohort.getSampleIds()))); } sc.setVariantHeader(studyMetadata.getAggregatedHeader()); sc.setAggregation(studyMetadata.getAggregation()); studyMetadata.getAttributes().forEach(sc.getAttributes()::put); } return studyConfigurations; }
VariantStudyMetadata studyMetadata = manager.getVariantMetadata().getStudies().get(0); VCFExporter vcfExporter = new VCFExporter(studyMetadata); vcfExporter.open(Paths.get(vcfFilename));
throw new IOException(e); if (!variantMetadata.getStudies().isEmpty()) { List<String> annotations = queryOptions.getAsStringList("annotations"); exporter = VcfDataWriter.newWriterForAvro(variantMetadata, annotations, outputStream);