final SamFileValidator validator = new SamFileValidator(out, MAX_OPEN_TEMP_FILES); validator.setErrorsToIgnore(IGNORE); validator.setSkipMateValidation(SKIP_MATE_VALIDATION); validator.setBisulfiteSequenced(IS_BISULFITE_SEQUENCED); validator.setIgnoreWarnings(IGNORE_WARNINGS); validator.setVerbose(false, 0); } else { validator.setVerbose(true, MAX_OUTPUT); validator.setIndexValidationStringency(VALIDATE_INDEX ? IndexValidationStringency.EXHAUSTIVE : IndexValidationStringency.NONE); validator.validateBamFileTermination(INPUT); result = validator.validateSamFileSummary(samReader, reference); break; case VERBOSE: result = validator.validateSamFileVerbose(samReader, reference); break; return ReturnTypes.SUCCESSFUL.value(); // ran successfully with no warnings or errors } else { if (validator.getNumErrors() == 0) { if (validator.getNumWarnings() > 0) { return ReturnTypes.WARNINGS.value(); // warnings but no errors } else { if (validator.getNumWarnings() > 0) { return ReturnTypes.ERRORS_WARNINGS.value(); // errors and warnings
for (final SAMValidationError error : errors) { error.setRecordNumber(recordNumber); addError(error); validateMateFields(record, recordNumber); validateSortOrder(record, recordNumber); validateReadGroup(record, header); final boolean cigarIsValid = validateCigar(record, recordNumber); if (cigarIsValid) { validateNmTag(record, recordNumber); validateSecondaryBaseCalls(record, recordNumber); validateTags(record, recordNumber); if (sequenceDictionaryEmptyAndNoWarningEmitted && !record.getReadUnmappedFlag()) { addError(new SAMValidationError(Type.MISSING_SEQUENCE_DICTIONARY, "Sequence dictionary is empty", null)); sequenceDictionaryEmptyAndNoWarningEmitted = false; final FastqQualityFormat format = qualityDetector.generateBestGuess(QualityEncodingDetector.FileContext.SAM, FastqQualityFormat.Standard); if (format != FastqQualityFormat.Standard) { addError(new SAMValidationError(Type.INVALID_QUALITY_FORMAT, String.format("Detected %s quality score encoding, but expected %s.", format, FastqQualityFormat.Standard), null)); addError(new SAMValidationError(Type.INVALID_QUALITY_FORMAT, e.getMessage(), null)); throw new SAMException(msg, e); } catch (FileTruncatedException e) { addError(new SAMValidationError(Type.TRUNCATED_FILE, "File is truncated", null)); } finally { iter.close();
/** * Outputs validation error details to out. * * @param samReader records to validate * @param reference if null, NM tag validation is skipped * processing will stop after this threshold has been reached * @return boolean true if there are no validation errors, otherwise false */ public boolean validateSamFileVerbose(final SamReader samReader, final ReferenceSequenceFile reference) { init(reference, samReader.getFileHeader()); try { validateSamFile(samReader, out); } catch (MaxOutputExceededException e) { out.println("Maximum output of [" + maxVerboseOutput + "] errors reached."); } final boolean result = errorsByType.isEmpty(); cleanup(); return result; }
@Test(dataProvider = "validateBamFileTerminationData") public void validateBamFileTerminationTest(final File file, final SAMValidationError.Type errorType, final int numWarnings, final int numErrors) throws IOException { final SamFileValidator samFileValidator = new SamFileValidator(new PrintWriter(System.out), 8000); samFileValidator.validateBamFileTermination(file); Assert.assertEquals(samFileValidator.getErrorsByType().get(errorType).getValue(), 1.0); Assert.assertEquals(samFileValidator.getNumWarnings(), numWarnings); Assert.assertEquals(samFileValidator.getNumErrors(), numErrors); }
private void validateSamFile(final SAMFileReader samReader, final PrintWriter out) { try { samReader.setValidationStringency(ValidationStringency.SILENT); validateHeader(samReader.getFileHeader()); orderChecker = new SAMSortOrderChecker(samReader.getFileHeader().getSortOrder()); validateSamRecordsAndQualityFormat(samReader, samReader.getFileHeader()); validateUnmatchedPairs(); if (validateIndex) { try { BamIndexValidator.exhaustivelyTestIndex(samReader); } catch (Exception e) { addError(new SAMValidationError(Type.INVALID_INDEX_FILE_POINTER, e.getMessage(), null)); } } if (errorsByType.isEmpty()) { out.println("No errors found"); } } finally { out.flush(); } }
@Test(dataProvider = "testCleanSamDataProvider") public void testCleanSam(final String samFile, final String expectedCigar) throws IOException { final File cleanedFile = File.createTempFile(samFile + ".", ".sam"); cleanedFile.deleteOnExit(); final String[] args = new String[]{ "INPUT=" + new File(TEST_DATA_DIR, samFile).getAbsolutePath(), "OUTPUT=" + cleanedFile.getAbsolutePath() }; Assert.assertEquals(runPicardCommandLine(args), 0); final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000); validator.setIgnoreWarnings(true); validator.setVerbose(true, 1000); validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP)); SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile); final SAMRecord rec = samReader.iterator().next(); samReader.close(); Assert.assertEquals(rec.getCigarString(), expectedCigar); samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(cleanedFile); final boolean validated = validator.validateSamFileVerbose(samReader, null); samReader.close(); Assert.assertTrue(validated, "ValidateSamFile failed"); }
private Histogram<String> executeValidationWithErrorIgnoring(final SamReader samReader, final ReferenceSequenceFile reference, final IndexValidationStringency stringency, final Collection<SAMValidationError.Type> ignoringError, final boolean skipMateValidation) throws IOException { final File outFile = File.createTempFile("validation", ".txt"); outFile.deleteOnExit(); final PrintWriter out = new PrintWriter(outFile); final SamFileValidator samFileValidator = new SamFileValidator(out, 8000); samFileValidator.setIndexValidationStringency(stringency).setErrorsToIgnore(ignoringError); samFileValidator.setSkipMateValidation(skipMateValidation); samFileValidator.validateSamFileSummary(samReader, reference); final LineNumberReader reader = new LineNumberReader(new FileReader(outFile)); if (reader.readLine().equals("No errors found")) { return new Histogram<>(); } final MetricsFile<MetricBase, String> outputFile = new MetricsFile<>(); outputFile.read(new FileReader(outFile)); Assert.assertNotNull(outputFile.getHistogram()); return outputFile.getHistogram(); }
@Test public void testVerbose() throws IOException { final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder(); for (int i = 0; i < 20; i++) { samBuilder.addFrag(String.valueOf(i), 1, i, false); } for (final SAMRecord record : samBuilder) { record.setProperPairFlag(true); } final StringWriter results = new StringWriter(); final SamFileValidator validator = new SamFileValidator(new PrintWriter(results), 8000); validator.setVerbose(true, 10); validator.validateSamFileVerbose(samBuilder.getSamReader(), null); final int lineCount = results.toString().split("\n").length; Assert.assertEquals(lineCount, 11); // 1 extra message added to indicate maximum number of errors Assert.assertEquals(validator.getNumErrors(), 6); Assert.assertEquals(validator.getNumWarnings(), 4); }
SamFileValidator v = new SamFileValidator(new PrintWriter(System.out), 1); final SamReader reader = SamReaderFactory.make().referenceSequence(params.reference).open(params.cramFile); List<SAMValidationError.Type> errors = new ArrayList<SAMValidationError.Type>(); errors.add(SAMValidationError.Type.MATE_NOT_FOUND); v.setErrorsToIgnore(errors); v.validateSamFileSummary(reader, ReferenceSequenceFileFactory.getReferenceSequenceFile(params.reference)); log.info("Elapsed seconds: " + progress.getElapsedSeconds());
@Test public void testNonBarcodedWithCenter() throws Exception { final File outputBam = File.createTempFile("nonBarcodedDescriptionNonBI.", ".sam"); outputBam.deleteOnExit(); final int lane = 1; Assert.assertEquals(runPicardCommandLine(new String[]{ "BASECALLS_DIR=" + BASECALLS_DIR, "LANE=" + lane, "READ_STRUCTURE=25S8S25T", "OUTPUT=" + outputBam, "RUN_BARCODE=HiMom", "SAMPLE_ALIAS=HiDad", "SEQUENCING_CENTER=TEST_CENTER123", "LIBRARY_NAME=Hello, World" }), 0); final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 100); validator.validateSamFileSummary(SamReaderFactory.makeDefault().open(outputBam), null); IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedDescriptionNonBI.sam")); }
/** * Report error if a tag value is a Long. */ private void validateTags(final SAMRecord record, final long recordNumber) { for (final SAMRecord.SAMTagAndValue tagAndValue : record.getAttributes()) { if (tagAndValue.value instanceof Long) { addError(new SAMValidationError(Type.TAG_VALUE_TOO_LARGE, "Numeric value too large for tag " + tagAndValue.tag, record.getReadName(), recordNumber)); } } }
private void validateMateFields(final SAMRecord record, final long recordNumber) { if (!record.getReadPairedFlag() || record.isSecondaryOrSupplementary()) { return; } validateMateCigar(record, recordNumber); final PairEndInfo pairEndInfo = pairEndInfoByName.remove(record.getReferenceIndex(), record.getReadName()); if (pairEndInfo == null) { pairEndInfoByName.put(record.getMateReferenceIndex(), record.getReadName(), new PairEndInfo(record, recordNumber)); } else { final List<SAMValidationError> errors = pairEndInfo.validateMates(new PairEndInfo(record, recordNumber), record.getReadName()); for (final SAMValidationError error : errors) { addError(error); } } }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }
/** * @deprecated use {@link #setIndexValidationStringency} instead */ @Deprecated public SamFileValidator setValidateIndex(final boolean validateIndex) { // The SamReader must also have IndexCaching enabled to have the index validated, return this.setIndexValidationStringency(validateIndex ? IndexValidationStringency.EXHAUSTIVE : IndexValidationStringency.NONE); }
protected void test() { try { final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000); validator.setIgnoreWarnings(true); validator.setVerbose(true, 1000); validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP)); SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT); SamReader samReader = factory.open(getOutput()); final boolean validated = validator.validateSamFileVerbose(samReader, null); CloserUtil.close(samReader);
private void validateSamFile(final SamReader samReader, final PrintWriter out) { try { validateHeader(samReader.getFileHeader()); orderChecker = new SAMSortOrderChecker(samReader.getFileHeader().getSortOrder()); validateSamRecordsAndQualityFormat(samReader, samReader.getFileHeader()); validateUnmatchedPairs(); if (indexValidationStringency != IndexValidationStringency.NONE) { try { if (indexValidationStringency == IndexValidationStringency.LESS_EXHAUSTIVE) { BamIndexValidator.lessExhaustivelyTestIndex(samReader); } else { BamIndexValidator.exhaustivelyTestIndex(samReader); } } catch (Exception e) { addError(new SAMValidationError(Type.INVALID_INDEX_FILE_POINTER, e.getMessage(), null)); } } if (errorsByType.isEmpty()) { out.println("No errors found"); } } finally { out.flush(); } }
@Test(dataProvider = "molecularBarcodeData") public void testMolecularBarcodes(final String readStructure, final String[] umiTags, final String[] extraArgs, final String expectedSam, final int expectedReturn) throws Exception { final File outputBam = File.createTempFile("molecularBarcodeTest.", ".sam"); outputBam.deleteOnExit(); final int lane = 1; List<String> args = new ArrayList<>(CollectionUtil.makeList( "BASECALLS_DIR=" + BASECALLS_DIR, "LANE=" + lane, "READ_STRUCTURE=" + readStructure, "OUTPUT=" + outputBam, "RUN_BARCODE=HiMom", "SAMPLE_ALIAS=HiDad", "SEQUENCING_CENTER=BI", "LIBRARY_NAME=Hello, World")); if (umiTags != null) { for (final String umiTag : umiTags) { args.add("TAG_PER_MOLECULAR_INDEX=" + umiTag); } } if (extraArgs != null) { args.addAll(Arrays.asList(extraArgs)); } Assert.assertEquals(runPicardCommandLine(args), expectedReturn); if (expectedSam != null) { final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 100); validator.validateSamFileSummary(SamReaderFactory.makeDefault().open(outputBam), null); IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, expectedSam)); } }
public void validateBamFileTermination(final File inputFile) { try { if (!IOUtil.isBlockCompressed(inputFile.toPath())) { return; } final BlockCompressedInputStream.FileTermination terminationState = BlockCompressedInputStream.checkTermination(inputFile); if (terminationState.equals(BlockCompressedInputStream.FileTermination.DEFECTIVE)) { addError(new SAMValidationError(Type.TRUNCATED_FILE, "BAM file has defective last gzip block", inputFile.getPath())); } else if (terminationState.equals(BlockCompressedInputStream.FileTermination.HAS_HEALTHY_LAST_BLOCK)) { addError(new SAMValidationError(Type.BAM_FILE_MISSING_TERMINATOR_BLOCK, "Older BAM file -- does not have terminator block", inputFile.getPath())); } } catch (IOException e) { throw new SAMException("IOException", e); } }
private void validateMateFields(final SAMRecord record, final long recordNumber) { if (!record.getReadPairedFlag() || record.isSecondaryOrSupplementary()) { return; } validateMateCigar(record, recordNumber); if (skipMateValidation) { return; } final PairEndInfo pairEndInfo = pairEndInfoByName.remove(record.getReferenceIndex(), record.getReadName()); if (pairEndInfo == null) { pairEndInfoByName.put(record.getMateReferenceIndex(), record.getReadName(), new PairEndInfo(record, recordNumber)); } else { final List<SAMValidationError> errors = pairEndInfo.validateMates(new PairEndInfo(record, recordNumber), record.getReadName()); for (final SAMValidationError error : errors) { addError(error); } } }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }