public Configuration configure(VCFCompoundHeaderLine line) { this.otherFieldsMap.put(line.getKey(), Pair.of(line.getType(), line.getCountType())); return this; }
private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) { try { if ( string.equals(VCFConstants.MISSING_VALUE_v4) ) return null; else { switch ( format.getType() ) { case Character: return string; case Flag: final boolean b = Boolean.valueOf(string) || string.equals("1"); if ( b == false ) throw new TribbleException("VariantContext FLAG fields " + field + " cannot contain false values" + " as seen at " + getContig() + ":" + getStart()); return b; case String: return string; case Integer: return Integer.valueOf(string); case Float: return Double.valueOf(string); default: throw new TribbleException("Unexpected type for field" + field); } } } catch (NumberFormatException e) { throw new TribbleException("Could not decode field " + field + " with value " + string + " of declared type " + format.getType()); } }
private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) { try { if ( string.equals(VCFConstants.MISSING_VALUE_v4) ) return null; else { switch ( format.getType() ) { case Character: return string; case Flag: final boolean b = Boolean.valueOf(string) || string.equals("1"); if ( b == false ) throw new TribbleException("VariantContext FLAG fields " + field + " cannot contain false values" + " as seen at " + getChr() + ":" + getStart()); return b; case String: return string; case Integer: return Integer.valueOf(string); case Float: return Double.valueOf(string); default: throw new TribbleException("Unexpected type for field" + field); } } } catch (NumberFormatException e) { throw new TribbleException("Could not decode field " + field + " with value " + string + " of declared type " + format.getType()); } }
private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) { try { if ( string.equals(VCFConstants.MISSING_VALUE_v4) ) return null; else { switch ( format.getType() ) { case Character: return string; case Flag: final boolean b = Boolean.valueOf(string) || string.equals("1"); if ( b == false ) throw new TribbleException("VariantContext FLAG fields " + field + " cannot contain false values" + " as seen at " + getContig() + ":" + getStart()); return b; case String: return string; case Integer: return Integer.valueOf(string); case Float: return Double.valueOf(string); default: throw new TribbleException("Unexpected type for field" + field); } } } catch (NumberFormatException e) { throw new TribbleException("Could not decode field " + field + " with value " + string + " of declared type " + format.getType()); } }
/** * Loop over the info field key / value pairs in this BCF2 file and decode them into the builder * * @param builder * @param numInfoFields */ private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException { if ( numInfoFields == 0 ) // fast path, don't bother doing any work if there are no fields return; final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields); for ( int i = 0; i < numInfoFields; i++ ) { final String key = getDictionaryString(); Object value = decoder.decodeTypedValue(); final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, key); if ( metaData.getType() == VCFHeaderLineType.Flag ) value = true; // special case for flags infoFieldEntries.put(key, value); } builder.attributes(infoFieldEntries); }
/** * Loop over the info field key / value pairs in this BCF2 file and decode them into the builder * * @param builder * @param numInfoFields */ private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException { if ( numInfoFields == 0 ) // fast path, don't bother doing any work if there are no fields return; final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields); for ( int i = 0; i < numInfoFields; i++ ) { final String key = getDictionaryString(); Object value = decoder.decodeTypedValue(); final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, key); if ( metaData.getType() == VCFHeaderLineType.Flag ) value = true; // special case for flags infoFieldEntries.put(key, value); } builder.attributes(infoFieldEntries); }
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes, final VCFHeader header, final boolean lenientDecoding) { final Map<String, Object> newAttributes = new HashMap<>(10); for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) { final String field = attr.getKey(); if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) continue; // gross, FT is part of the extended attributes final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field); final Object decoded = decodeValue(field, attr.getValue(), format); if ( decoded != null && ! lenientDecoding && format.getCountType() != VCFHeaderLineCount.UNBOUNDED && format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1; final int expSize = format.getCount(this); if ( obsSize != expSize ) { throw new TribbleException.InvalidHeader("Discordant field size detected for field " + field + " at " + getContig() + ":" + getStart() + ". Field had " + obsSize + " values " + "but the header says this should have " + expSize + " values based on header record " + format); } } newAttributes.put(field, decoded); } return newAttributes; }
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes, final VCFHeader header, final boolean lenientDecoding) { final Map<String, Object> newAttributes = new HashMap<>(10); for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) { final String field = attr.getKey(); if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) continue; // gross, FT is part of the extended attributes final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field); final Object decoded = decodeValue(field, attr.getValue(), format); if ( decoded != null && ! lenientDecoding && format.getCountType() != VCFHeaderLineCount.UNBOUNDED && format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1; final int expSize = format.getCount(this); if ( obsSize != expSize ) { throw new TribbleException.InvalidHeader("Discordant field size detected for field " + field + " at " + getContig() + ":" + getStart() + ". Field had " + obsSize + " values " + "but the header says this should have " + expSize + " values based on header record " + format); } } newAttributes.put(field, decoded); } return newAttributes; }
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes, final VCFHeader header, final boolean lenientDecoding) { final Map<String, Object> newAttributes = new HashMap<String, Object>(10); for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) { final String field = attr.getKey(); if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) continue; // gross, FT is part of the extended attributes final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field); final Object decoded = decodeValue(field, attr.getValue(), format); if ( decoded != null && ! lenientDecoding && format.getCountType() != VCFHeaderLineCount.UNBOUNDED && format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1; final int expSize = format.getCount(this); if ( obsSize != expSize ) { throw new TribbleException.InvalidHeader("Discordant field size detected for field " + field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " + "but the header says this should have " + expSize + " values based on header record " + format); } } newAttributes.put(field, decoded); } return newAttributes; }
/** * Loop over the info field key / value pairs in this BCF2 file and decode them into the builder * * @param builder * @param numInfoFields */ @Requires("numInfoFields >= 0") private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException { if ( numInfoFields == 0 ) // fast path, don't bother doing any work if there are no fields return; final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields); for ( int i = 0; i < numInfoFields; i++ ) { final String key = getDictionaryString(); Object value = decoder.decodeTypedValue(); final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, key); if ( metaData.getType() == VCFHeaderLineType.Flag ) value = true; // special case for flags infoFieldEntries.put(key, value); } builder.attributes(infoFieldEntries); }
public T repair(final T line) { final T standard = get(line.getID(), false); if ( standard != null ) { final boolean badCountType = line.getCountType() != standard.getCountType(); final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount(); final boolean badType = line.getType() != standard.getType(); final boolean badDesc = ! line.getDescription().equals(standard.getDescription()); final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc); if ( needsRepair ) { if ( GeneralUtils.DEBUG_MODE_ENABLED ) { System.err.println("Repairing standard header line for field " + line.getID() + " because" + (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "") + (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "") + (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "") + (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": "")); } return standard; } else { return line; } } else { return line; } }
public T repair(final T line) { final T standard = get(line.getID(), false); if ( standard != null ) { final boolean badCountType = line.getCountType() != standard.getCountType(); final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount(); final boolean badType = line.getType() != standard.getType(); final boolean badDesc = ! line.getDescription().equals(standard.getDescription()); final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc); if ( needsRepair ) { if ( GeneralUtils.DEBUG_MODE_ENABLED ) { System.err.println("Repairing standard header line for field " + line.getID() + " because" + (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "") + (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "") + (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "") + (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": "")); } return standard; } else { return line; } } else { return line; } }
private BCF2FieldEncoder createFieldEncoder(final VCFCompoundHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict, final boolean createGenotypesEncoders ) { if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) { if ( GeneralUtils.DEBUG_MODE_ENABLED && line.getType() != VCFHeaderLineType.Integer ) System.err.println("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line); return new BCF2FieldEncoder.IntArray(line, dict); } else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) { return new BCF2FieldEncoder.GenericInts(line, dict); } else { switch ( line.getType() ) { case Character: case String: return new BCF2FieldEncoder.StringOrCharacter(line, dict); case Flag: return new BCF2FieldEncoder.Flag(line, dict); case Float: return new BCF2FieldEncoder.Float(line, dict); case Integer: if ( line.isFixedCount() && line.getCount() == 1 ) return new BCF2FieldEncoder.AtomicInt(line, dict); else return new BCF2FieldEncoder.GenericInts(line, dict); default: throw new IllegalArgumentException("Unexpected type for field " + line.getID()); } } }
private BCF2FieldEncoder createFieldEncoder(final VCFCompoundHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict, final boolean createGenotypesEncoders ) { if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) { if ( GeneralUtils.DEBUG_MODE_ENABLED && line.getType() != VCFHeaderLineType.Integer ) System.err.println("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line); return new BCF2FieldEncoder.IntArray(line, dict); } else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) { return new BCF2FieldEncoder.GenericInts(line, dict); } else { switch ( line.getType() ) { case Character: case String: return new BCF2FieldEncoder.StringOrCharacter(line, dict); case Flag: return new BCF2FieldEncoder.Flag(line, dict); case Float: return new BCF2FieldEncoder.Float(line, dict); case Integer: if ( line.isFixedCount() && line.getCount() == 1 ) return new BCF2FieldEncoder.AtomicInt(line, dict); else return new BCF2FieldEncoder.GenericInts(line, dict); default: throw new IllegalArgumentException("Unexpected type for field " + line.getID()); } } }
@Requires("line != null") @Ensures({"result != null", "result.getID().equals(line.getID())"}) public T repair(final T line) { final T standard = get(line.getID(), false); if ( standard != null ) { final boolean badCountType = line.getCountType() != standard.getCountType(); final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount(); final boolean badType = line.getType() != standard.getType(); final boolean badDesc = ! line.getDescription().equals(standard.getDescription()); final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc); if ( needsRepair ) { if ( GeneralUtils.DEBUG_MODE_ENABLED ) { System.err.println("Repairing standard header line for field " + line.getID() + " because" + (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "") + (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "") + (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "") + (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": "")); } return standard; } else return line; } else return line; }
private BCF2FieldEncoder createFieldEncoder(final VCFCompoundHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict, final boolean createGenotypesEncoders ) { if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) { if ( GeneralUtils.DEBUG_MODE_ENABLED && line.getType() != VCFHeaderLineType.Integer ) System.err.println("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line); return new BCF2FieldEncoder.IntArray(line, dict); } else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) { return new BCF2FieldEncoder.GenericInts(line, dict); } else { switch ( line.getType() ) { case Character: case String: return new BCF2FieldEncoder.StringOrCharacter(line, dict); case Flag: return new BCF2FieldEncoder.Flag(line, dict); case Float: return new BCF2FieldEncoder.Float(line, dict); case Integer: if ( line.isFixedCount() && line.getCount() == 1 ) return new BCF2FieldEncoder.AtomicInt(line, dict); else return new BCF2FieldEncoder.GenericInts(line, dict); default: throw new IllegalArgumentException("Unexpected type for field " + line.getID()); } } }
return line.getType().equals(VCFHeaderLineType.String) && line.getCount()==1 ; else if(id.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; else if(id.equals(VCFConstants.DEPTH_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; else if(id.equals(VCFConstants.GENOTYPE_PL_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.G); else if(id.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.R); else if(id.equals(VCFConstants.GENOTYPE_FILTER_KEY)) return line.getType().equals(VCFHeaderLineType.String) && line.getCountType().equals(VCFHeaderLineCount.UNBOUNDED); else if(id.equals(VCFConstants.PHASE_QUALITY_KEY)) return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1; else if(id.equals(VCFConstants.END_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; else if(id.equals(VCFConstants.DBSNP_KEY)) return line.getType().equals(VCFHeaderLineType.Flag) && line.getCount()==0; else if(id.equals(VCFConstants.DEPTH_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; else if(id.equals(VCFConstants.STRAND_BIAS_KEY)) return line.getType().equals(VCFHeaderLineType.Float) && line.getCount()==1; else if(id.equals(VCFConstants.ALLELE_FREQUENCY_KEY)) return line.getType().equals(VCFHeaderLineType.Float) && line.getCountType().equals(VCFHeaderLineCount.A); else if(id.equals(VCFConstants.ALLELE_COUNT_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCountType().equals(VCFHeaderLineCount.A); else if(id.equals(VCFConstants.ALLELE_NUMBER_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1; else if(id.equals(VCFConstants.MAPPING_QUALITY_ZERO_KEY)) return line.getType().equals(VCFHeaderLineType.Integer) && line.getCount()==1;
@Test(dataProvider = "RepairHeaderTest") public void testRepairHeaderTest(final RepairHeaderTest cfg) { final VCFHeader toRepair = new VCFHeader(Collections.singleton((VCFHeaderLine)cfg.original)); final VCFHeader repaired = VCFStandardHeaderLines.repairStandardHeaderLines(toRepair); VCFCompoundHeaderLine repairedLine = (VCFCompoundHeaderLine)repaired.getFormatHeaderLine(cfg.original.getID()); if ( repairedLine == null ) repairedLine = (VCFCompoundHeaderLine)repaired.getInfoHeaderLine(cfg.original.getID()); Assert.assertNotNull(repairedLine, "Repaired header didn't contain the expected line"); Assert.assertEquals(repairedLine.getID(), cfg.expectedResult.getID()); Assert.assertEquals(repairedLine.getType(), cfg.expectedResult.getType()); Assert.assertEquals(repairedLine.getCountType(), cfg.expectedResult.getCountType()); if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER ) { Assert.assertEquals(repairedLine.getCount(), cfg.expectedResult.getCount()); } } }
private VariantSetMetadata convert(VCFCompoundHeaderLine hl) { VariantSetMetadata vsm = convertHeaderLine((VCFHeaderLine) hl); vsm.setId(hl.getID()); if (hl.isFixedCount()) { vsm.setNumber(Integer.toString(hl.getCount())); } else { vsm.setNumber(hl.getCountType().name()); } vsm.setType(hl.getType().name()); vsm.setDescription(hl.getDescription()); // Empty for the moment Map<String, List<String>> infoMap = Collections.emptyMap(); vsm.setInfo(infoMap); return vsm; }
.setId(vcfLine.getID()) .setDescription(vcfLine.getDescription()) .setType(vcfLine.getType().toString()) .setNumber(number).build());