@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes") public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) { BCF2Encoder encoder = new BCF2Encoder(); List<Object> expected = Collections.nCopies(length, tv.value); encoder.encodeTyped(expected, tv.type); BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); final Object decoded = decoder.decodeTypedValue(); Assert.assertTrue(decoded instanceof List); final List<Object> decodedList = (List<Object>)decoded; Assert.assertEquals(decodedList.size(), expected.size()); for ( Object decodedValue : decodedList ) myAssertEquals(tv, decodedValue); } } }
private final byte[] encodeRecord(final List<BCF2TypedValue> toEncode) throws IOException { BCF2Encoder encoder = new BCF2Encoder(); for ( final BCF2TypedValue tv : toEncode ) { if ( tv.isMissing() ) encoder.encodeTypedMissing(tv.type); else { final BCF2Type encodedType = encoder.encode(tv.value); if ( tv.type != null ) // only if we have an expectation Assert.assertEquals(encodedType, tv.type); } } // check output final byte[] record = encoder.getRecordBytes(); Assert.assertNotNull(record); Assert.assertTrue(record.length > 0); return record; }
public void testBCF2BasicTypesWithEncodeMe(final List<BCF2TypedValue> toEncode, final EncodeMe func) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { BCF2Encoder encoder = new BCF2Encoder(); func.encode(encoder, tv); BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); final Object decoded = decoder.decodeTypedValue(); Assert.assertNotNull(decoded); Assert.assertFalse(decoded instanceof List); myAssertEquals(tv, decoded); } }
@Test(dataProvider = "IntArrays") public void testIntArrays(final List<Integer> ints) throws IOException { final BCF2Encoder encoder = new BCF2Encoder(); encoder.encodeTyped(ints, BCF2Type.INT16); final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); final byte typeDescriptor = decoder.readTypeDescriptor(); // read the int[] with the low-level version final int size = decoder.decodeNumberOfElements(typeDescriptor); final int[] decoded = decoder.decodeIntArray(typeDescriptor, size); if ( isMissing(ints) ) { // we expect that the result is null in this case Assert.assertNull(decoded, "Encoded all missing values -- expected null"); } else { // we expect at least some values to come back Assert.assertTrue(decoded.length > 0, "Must have at least 1 element for non-null encoded data"); // check corresponding values for ( int i = 0; i < ints.size(); i++ ) { final Integer expected = ints.get(i); if ( expected == null ) { Assert.assertTrue(decoded.length <= i, "we expect decoded to be truncated for missing values"); } else { Assert.assertTrue(decoded.length > i, "we expected at least " + i + " values in decoded array"); Assert.assertEquals(decoded[i], (int)expected); } } } }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes") public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { if ( tv.type != BCF2Type.CHAR ) { for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) { final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type); final BCF2Encoder encoder = new BCF2Encoder(); for ( int i = 0; i < length; i++ ) { encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type); } final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); for ( int i = 0; i < length; i++ ) { final Object decoded = decoder.decodeTypedValue(td); myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded); } } } } }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getContig()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getChr()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getChr())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getContig()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }