/** * Read the {@link VariantFileMetadata} from a variant file. * * Accepted formats: Avro, Json and VCF * * @param input Input variant file (avro, json, vcf) * @return Read {@link VariantFileMetadata} * @throws StorageEngineException if the format is not valid or there is an error reading */ public static VariantFileMetadata readVariantFileMetadata(Path input) throws StorageEngineException { return readVariantFileMetadata(input, null); }
throws StorageEngineException { if (metadata == null) { metadata = createEmptyVariantFileMetadata(input); if (isTransformedVariants(input.toString())) { input = getMetaFromTransformedFile(input); if (isMetaFile(input.toString())) { try (InputStream inputStream = FileUtils.newInputStream(input)) { return VariantReaderUtils.readVariantFileMetadataFromJson(inputStream); } catch (IOException | RuntimeException e) { throw new StorageEngineException("Unable to read VariantSource", e); markStdin(); VariantReader reader = getVariantReader(input, metadata.toVariantStudyMetadata(""), stdin); try { metadata = VariantMetadataUtils.readVariantFileMetadata(reader, metadata); resetStdin();
public MongoDBVariantStoragePipeline(StorageConfiguration configuration, String storageEngineId, VariantMongoDBAdaptor dbAdaptor) { super(configuration, storageEngineId, dbAdaptor, new VariantReaderUtils()); this.dbAdaptor = dbAdaptor; }
public static String getOriginalFromTransformedFile(String variantsFile) { if (isTransformedVariants(variantsFile)) { int idx = variantsFile.lastIndexOf(VARIANTS_FILE); return variantsFile.substring(0, idx - 1); } else if (isMetaFile(variantsFile)) { int idx = variantsFile.lastIndexOf(METADATA_FILE); return variantsFile.substring(0, idx - 1); } else { return variantsFile; } }
/** * Get a variant data reader depending on the type of the input file. * * @param input Stream Input variant file (avro, json, vcf) * @param metadata Optional VariantSource * @param stdin Indicate if the file should be read from the Standard Input * @return VariantReader * @throws StorageEngineException if the format is not valid or there is an error reading */ public static VariantReader getVariantReader(Path input, VariantStudyMetadata metadata, boolean stdin) throws StorageEngineException { String fileName = input.getFileName().toString(); if (metadata == null) { VariantFileMetadata variantFileMetadata = createEmptyVariantFileMetadata(input); metadata = variantFileMetadata.toVariantStudyMetadata(""); } if (isJson(fileName)) { return getVariantJsonReader(input, metadata); } else if (isAvro(fileName)) { return getVariantAvroReader(input, metadata, stdin); } else if (isVcf(fileName)) { return getVariantVcfReader(input, metadata, stdin); } else { throw variantInputNotSupported(input); } }
@Override public VariantFileMetadata readVariantFileMetadata(URI input) throws StorageEngineException { VariantFileMetadata source; if (input.getScheme() == null || input.getScheme().startsWith("file")) { return VariantReaderUtils.readVariantFileMetadata(Paths.get(input.getPath()), null); } Path metaPath = new Path(VariantReaderUtils.getMetaFromTransformedFile(input.toString())); FileSystem fs = null; try { fs = FileSystem.get(conf); } catch (IOException e) { throw new StorageEngineException("Unable to get FileSystem", e); } try ( InputStream inputStream = new GZIPInputStream(fs.open(metaPath)) ) { source = VariantReaderUtils.readVariantFileMetadataFromJson(inputStream); } catch (IOException e) { throw new StorageEngineException("Unable to read VariantFileMetadata", e); } return source; } }
/** * Get a variant data reader depending on the type of the input file. * * @param input Stream Input variant file (avro, json, vcf) * @return VariantReader * @throws StorageEngineException if the format is not valid or there is an error reading */ public static VariantReader getVariantReader(Path input) throws StorageEngineException { return getVariantReader(input, null); }
final VariantFileMetadata metadata = VariantReaderUtils.readVariantFileMetadata(input, metadataTemplate, stdin); Path outputMetaFile = VariantReaderUtils.getMetaFromTransformedFile(outputVariantsFile); Pair<VCFHeader, VCFHeaderVersion> header = VariantReaderUtils.readHtsHeader(input, stdin); VariantSetStatsCalculator statsCalculator = new VariantSetStatsCalculator(studyId, metadata); taskSupplier = () -> new VariantAvroTransformTask(header.getKey(), header.getValue(), studyId, metadata, outputMetaFile, if (parser.equalsIgnoreCase(HTSJDK_PARSER)) { logger.info("Using HTSJDK to read variants."); Pair<VCFHeader, VCFHeaderVersion> header = VariantReaderUtils.readHtsHeader(input, stdin); VariantSetStatsCalculator statsCalculator = new VariantSetStatsCalculator(studyId, metadata); taskSupplier = () -> new VariantJsonTransformTask(header.getKey(), header.getValue(), studyId, metadata,
); VariantFileMetadata fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(etlResult.getTransformResult()); checkTransformedVariants(etlResult.getTransformResult(), studyConfiguration, fileMetadata.getStats().getNumVariants()); VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(); VariantReader reader = VariantReaderUtils.getVariantReader(Paths.get(etlResult.getTransformResult().getPath()), new VariantFileMetadata(fileId, "").toVariantStudyMetadata(String.valueOf(studyConfiguration.getStudyId())));
protected static VariantJsonReader getVariantJsonReader(Path input, VariantStudyMetadata metadata) throws StorageEngineException { VariantJsonReader variantJsonReader; if (isJson(input.toString())) { Path sourceFile = getMetaFromTransformedFile(input.toAbsolutePath()); variantJsonReader = new VariantJsonReader(metadata, input.toAbsolutePath().toString(), sourceFile.toAbsolutePath().toString()); } else { throw variantInputNotSupported(input); } return variantJsonReader; }
String fileName = input.getFileName().toString(); VariantFileMetadata fileMetadata = variantReaderUtils.readVariantFileMetadata(inputUri); fileMetadata.setId(String.valueOf(fileId)); if (VariantReaderUtils.isProto(fileName)) { ArchiveTableHelper helper = new ArchiveTableHelper(dbAdaptor.getGenomeHelper(), studyId, fileMetadata);
public static Path getMetaFromTransformedFile(Path variantsFile) { return Paths.get(getMetaFromTransformedFile(variantsFile.toString())); }
protected static VariantAvroReader getVariantAvroReader(Path input, VariantStudyMetadata metadata, boolean stdin) throws StorageEngineException { VariantAvroReader variantAvroReader; if (isAvro(input.toString())) { String sourceFile = getMetaFromTransformedFile(input.toAbsolutePath().toString()); if (stdin) { variantAvroReader = new VariantAvroReader(System.in, new File(sourceFile), metadata); } else { variantAvroReader = new VariantAvroReader(input.toAbsolutePath().toFile(), new File(sourceFile), metadata); } } else { throw variantInputNotSupported(input); } return variantAvroReader; }
if (VariantReaderUtils.isVcf(input.toString())) { VariantVcfHtsjdkReader reader = new VariantVcfHtsjdkReader(input, fileMetadata.toVariantStudyMetadata(studyId), normalizer); dataReader = VariantReaderUtils.getVariantReader(input, fileMetadata.toVariantStudyMetadata(studyId));
public static Pair<VCFHeader, VCFHeaderVersion> readHtsHeader(Path input, boolean stdin) throws StorageEngineException { if (stdin) { markStdin(); } VariantVcfHtsjdkReader vcfReader = VariantReaderUtils.getVariantVcfReader(input, null, stdin); vcfReader.open(); vcfReader.pre(); VCFHeader vcfHeader = vcfReader.getVCFHeader(); VCFHeaderVersion version = vcfReader.getVCFHeaderVersion(); vcfReader.close(); if (stdin) { resetStdin(); } return Pair.of(vcfHeader, version); }
@Test public void formatTest() { assertTrue(VariantReaderUtils.isProto("s1.genome.vcf.variants.proto.gz")); assertTrue(VariantReaderUtils.isJson("fileName.json")); assertTrue(VariantReaderUtils.isJson("fileName.json.gz")); assertFalse(VariantReaderUtils.isJson("fileName.vcf.gz")); assertFalse(VariantReaderUtils.isJson("fileName.json.vcf.gz")); assertFalse(VariantReaderUtils.isJson("json")); assertFalse(VariantReaderUtils.isJson("json.gz")); }
public static void checkTransformedVariants(String file) { if (!isTransformedVariants(file)) { throw new IllegalArgumentException("Not a valid transformed variants file : " + file); } }
public static VariantVcfHtsjdkReader getVariantVcfReader(Path input, VariantStudyMetadata metadata) { return getVariantVcfReader(input, metadata, false); }
public static void checkMetaFile(String file) { if (!isMetaFile(file)) { throw new IllegalArgumentException("Not a valid transformed variants metadata file : " + file); } }