/** * Create a protein from a string * * @param seqString * @throws CompoundNotFoundException */ public ProteinSequence(String seqString) throws CompoundNotFoundException { this(seqString, AminoAcidCompoundSet.getAminoAcidCompoundSet()); }
private boolean isSimilar(char c1, char c2) { AminoAcidCompoundSet set = AminoAcidCompoundSet.getAminoAcidCompoundSet(); AminoAcidCompound aa1 = set.getCompoundForString(String.valueOf(c1)); AminoAcidCompound aa2 = set.getCompoundForString(String.valueOf(c2)); short val = matrix.getValue(aa1,aa2); return val > 0; }
@Override public double getApliphaticIndex(ProteinSequence sequence) { // Aliphatic index = X(Ala) + a * X(Val) + b * ( X(Ile) + X(Leu) ) // where X(Ala), X(Val), X(Ile), and X(Leu) are mole percent (100 X mole fraction) // of alanine, valine, isoleucine, and leucine. // The coefficients a and b are the relative volume of valine side chain (a = 2.9) // and of Leu/Ile side chains (b = 3.9) to the side chain of alanine. // Ala => A, Val => V, Ile => I, Leu => L AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); Map<AminoAcidCompound, Double> aa2Composition = getAAComposition(sequence); final double a = 2.9; final double b = 3.9; double xAla = aa2Composition.get(aaSet.getCompoundForString("A")); double xVal = aa2Composition.get(aaSet.getCompoundForString("V")); double xIle = aa2Composition.get(aaSet.getCompoundForString("I")); double xLeu = aa2Composition.get(aaSet.getCompoundForString("L")); return (xAla + (a * xVal) + (b * (xIle + xLeu))) * 100; }
@Override public Map<AminoAcidCompound, Double> getAAComposition(ProteinSequence sequence) { int validLength = 0; Map<AminoAcidCompound, Double> aa2Composition = new HashMap<AminoAcidCompound, Double>(); AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); for(AminoAcidCompound aa:aaSet.getAllCompounds()){ aa2Composition.put(aa, 0.0); } char[] seq = this.getSequence(sequence.toString(), true); for(char aa:seq){ if(PeptideProperties.standardAASet.contains(aa)){ AminoAcidCompound compound = aaSet.getCompoundForString(String.valueOf(aa)); aa2Composition.put(compound, aa2Composition.get(compound) + 1.0); validLength++; } } if(validLength > 0){ for(AminoAcidCompound aa:aaSet.getAllCompounds()){ aa2Composition.put(aa, aa2Composition.get(aa) / validLength); } }else{ for(AminoAcidCompound aa:aaSet.getAllCompounds()){ aa2Composition.put(aa, 0.0); } } return aa2Composition; } }
@Override public boolean compoundsEquivalent(AminoAcidCompound compoundOne, AminoAcidCompound compoundTwo) { Set<AminoAcidCompound> equivalents = getEquivalentCompounds(compoundOne); return (equivalents != null) && equivalents.contains(compoundTwo); }
@Override public Set<AminoAcidCompound> getEquivalentCompounds(AminoAcidCompound compound) { if (equivalentsCache.isEmpty()) { // most compounds are equivalent to themselves alone for (AminoAcidCompound c : aminoAcidCompoundCache.values()) { equivalentsCache.put(c, Collections.singleton(c)); } // ambiguous Asparagine or Aspartic acid addAmbiguousEquivalents("N", "D", "B"); // ambiguous Glutamine or Glutamic acid addAmbiguousEquivalents("E", "Q", "Z"); // ambiguous Leucine or Isoleucine addAmbiguousEquivalents("I", "L", "J"); // ambiguous gaps AminoAcidCompound gap1, gap2, gap3; Set<AminoAcidCompound> gaps = new HashSet<AminoAcidCompound>(); gaps.add(gap1 = aminoAcidCompoundCache.get("-")); gaps.add(gap2 = aminoAcidCompoundCache.get(".")); gaps.add(gap3 = aminoAcidCompoundCache.get("_")); equivalentsCache.put(gap1, gaps); equivalentsCache.put(gap2, gaps); equivalentsCache.put(gap3, gaps); // X is never equivalent, even to itself equivalentsCache.put(aminoAcidCompoundCache.get("X"), new HashSet<AminoAcidCompound>()); } return equivalentsCache.get(compound); }
@Override public boolean compoundsEquivalent(AminoAcidCompound compoundOne, AminoAcidCompound compoundTwo) { Set<AminoAcidCompound> equivalents = getEquivalentCompounds(compoundOne); return (equivalents != null) && equivalents.contains(compoundTwo); }
@Override public Set<AminoAcidCompound> getEquivalentCompounds(AminoAcidCompound compound) { if (equivalentsCache.isEmpty()) { // most compounds are equivalent to themselves alone for (AminoAcidCompound c : aminoAcidCompoundCache.values()) { equivalentsCache.put(c, Collections.singleton(c)); } // ambiguous Asparagine or Aspartic acid addAmbiguousEquivalents("N", "D", "B"); // ambiguous Glutamine or Glutamic acid addAmbiguousEquivalents("E", "Q", "Z"); // ambiguous Leucine or Isoleucine addAmbiguousEquivalents("I", "L", "J"); // ambiguous gaps AminoAcidCompound gap1, gap2, gap3; Set<AminoAcidCompound> gaps = new HashSet<AminoAcidCompound>(); gaps.add(gap1 = aminoAcidCompoundCache.get("-")); gaps.add(gap2 = aminoAcidCompoundCache.get(".")); gaps.add(gap3 = aminoAcidCompoundCache.get("_")); equivalentsCache.put(gap1, gaps); equivalentsCache.put(gap2, gaps); equivalentsCache.put(gap3, gaps); // X is never equivalent, even to itself equivalentsCache.put(aminoAcidCompoundCache.get("X"), new HashSet<AminoAcidCompound>()); } return equivalentsCache.get(compound); }
/** * A protein sequence where the storage of the sequence is somewhere else. * Could be loaded from a large Fasta file or via a Uniprot Proxy reader via * Uniprot ID * * @param proxyLoader */ public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader) { this(proxyLoader, AminoAcidCompoundSet.getAminoAcidCompoundSet()); }
private boolean isSimilar(char c1, char c2) { AminoAcidCompoundSet set = AminoAcidCompoundSet.getAminoAcidCompoundSet(); AminoAcidCompound aa1 = set.getCompoundForString(String.valueOf(c1)); AminoAcidCompound aa2 = set.getCompoundForString(String.valueOf(c2)); short val = matrix.getValue(aa1,aa2); return val > 0; }
@Override public double getExtinctionCoefficient(ProteinSequence sequence, boolean assumeCysReduced) { //Tyr => Y //Trp => W //Cys => C //E(Prot) = Numb(Tyr)*Ext(Tyr) + Numb(Trp)*Ext(Trp) + Numb(Cystine)*Ext(Cystine) //where (for proteins in water measured at 280 nm): Ext(Tyr) = 1490, Ext(Trp) = 5500, Ext(Cystine) = 125; AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); Map<AminoAcidCompound, Integer> extinctAA2Count = this.getExtinctAACount(sequence); double eProt; if(!assumeCysReduced){ eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) * Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) + extinctAA2Count.get(aaSet.getCompoundForString("W")) * Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W")) + extinctAA2Count.get(aaSet.getCompoundForString("C")) * Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("C")); }else eProt = extinctAA2Count.get(aaSet.getCompoundForString("Y")) * Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("Y")) + extinctAA2Count.get(aaSet.getCompoundForString("W")) * Constraints.aa2ExtinctionCoefficient.get(aaSet.getCompoundForString("W")); return eProt; }
private CompoundSet<AminoAcidCompound> getAminoAcidCompounds() { if (aminoAcidCompounds != null) { return aminoAcidCompounds; } return AminoAcidCompoundSet.getAminoAcidCompoundSet(); }
private void initMatrix(String line) { String[] spl = line.split(" "); // trim off the final , character currentRows = spl[3].substring(0, spl[3].length()-1); currentCols = spl[6]; currentRowPos = -1; int nrRows = currentRows.length(); int nrCols = currentCols.length(); matrix = new short[nrRows][nrCols]; rows = new ArrayList<AminoAcidCompound>(); cols = new ArrayList<AminoAcidCompound>(); //System.out.println(">" + currentRows+"<"); AminoAcidCompoundSet compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet(); for ( int i = 0 ; i < currentRows.length() ; i ++){ char c = currentRows.charAt(i); AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c)); rows.add(aa); } for ( int i = 0 ; i < currentCols.length() ; i ++){ char c = currentRows.charAt(i); AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c)); cols.add(aa); } currentMatrix.setScale(scale); }
private Map<AminoAcidCompound, Integer> getExtinctAACount(ProteinSequence sequence){ //Cys => C, Tyr => Y, Trp => W int numW = 0; int smallW = 0; double numC = 0; double smallC = 0; int numY = 0; int smallY = 0; for(char aa:sequence.getSequenceAsString().toCharArray()){ switch(aa){ case 'W': numW++; break; case 'w': smallW++; break; case 'C': numC += 0.5; break; case 'c': smallC += 0.5; break; case 'Y': numY++; break; case 'y': smallY++; break; } } AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); Map<AminoAcidCompound, Integer> extinctAA2Count = new HashMap<AminoAcidCompound, Integer>(); //Ignore Case is always true extinctAA2Count.put(aaSet.getCompoundForString("W"), numW + smallW); extinctAA2Count.put(aaSet.getCompoundForString("C"), (int) (numC + smallC)); extinctAA2Count.put(aaSet.getCompoundForString("Y"), numY + smallY); return extinctAA2Count; }
/** * A protein sequence where the storage of the sequence is somewhere else. * Could be loaded from a large Fasta file or via a Uniprot Proxy reader via * Uniprot ID * * @param proxyLoader */ public ProteinSequence(ProxySequenceReader<AminoAcidCompound> proxyLoader) { this(proxyLoader, AminoAcidCompoundSet.getAminoAcidCompoundSet()); }
private void initMatrix(String line) { String[] spl = line.split(" "); // trim off the final , character currentRows = spl[3].substring(0, spl[3].length()-1); currentCols = spl[6]; currentRowPos = -1; int nrRows = currentRows.length(); int nrCols = currentCols.length(); matrix = new short[nrRows][nrCols]; rows = new ArrayList<AminoAcidCompound>(); cols = new ArrayList<AminoAcidCompound>(); //System.out.println(">" + currentRows+"<"); AminoAcidCompoundSet compoundSet = AminoAcidCompoundSet.getAminoAcidCompoundSet(); for ( int i = 0 ; i < currentRows.length() ; i ++){ char c = currentRows.charAt(i); AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c)); rows.add(aa); } for ( int i = 0 ; i < currentCols.length() ; i ++){ char c = currentRows.charAt(i); AminoAcidCompound aa = compoundSet.getCompoundForString(String.valueOf(c)); cols.add(aa); } currentMatrix.setScale(scale); }
AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); Map<AminoAcidCompound, Integer> chargedAA2Count = new HashMap<AminoAcidCompound, Integer>(); chargedAA2Count.put(aaSet.getCompoundForString("K"), numK); chargedAA2Count.put(aaSet.getCompoundForString("R"), numR); chargedAA2Count.put(aaSet.getCompoundForString("H"), numH); chargedAA2Count.put(aaSet.getCompoundForString("D"), numD); chargedAA2Count.put(aaSet.getCompoundForString("E"), numE); chargedAA2Count.put(aaSet.getCompoundForString("C"), numC); chargedAA2Count.put(aaSet.getCompoundForString("Y"), numY); return chargedAA2Count;
/** * Create a protein from a string * * @param seqString * @throws CompoundNotFoundException */ public ProteinSequence(String seqString) throws CompoundNotFoundException { this(seqString, AminoAcidCompoundSet.getAminoAcidCompoundSet()); }
Compound gapSymbol = AminoAcidCompoundSet.getAminoAcidCompoundSet().getCompoundForString("-");
AminoAcidCompoundSet aaSet = new AminoAcidCompoundSet(); AminoAcidCompound nTermCompound = aaSet.getCompoundForString(String.valueOf(nTerminalChar)); if(Constraints.aa2NTerminalPka.containsKey(nTermCompound)){ nTerminalCharge = this.getPosCharge(Constraints.aa2NTerminalPka.get(nTermCompound), ph); AminoAcidCompound cTermCompound = aaSet.getCompoundForString(String.valueOf(cTerminalChar)); if(Constraints.aa2CTerminalPka.containsKey(cTermCompound)){ cTerminalCharge = this.getNegCharge(Constraints.aa2CTerminalPka.get(cTermCompound), ph); double kCharge = chargedAA2Count.get(aaSet.getCompoundForString("K")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("K")), ph); double rCharge = chargedAA2Count.get(aaSet.getCompoundForString("R")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("R")), ph); double hCharge = chargedAA2Count.get(aaSet.getCompoundForString("H")) * this.getPosCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("H")), ph); double dCharge = chargedAA2Count.get(aaSet.getCompoundForString("D")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("D")), ph); double eCharge = chargedAA2Count.get(aaSet.getCompoundForString("E")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("E")), ph); double cCharge = chargedAA2Count.get(aaSet.getCompoundForString("C")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("C")), ph); double yCharge = chargedAA2Count.get(aaSet.getCompoundForString("Y")) * this.getNegCharge(Constraints.aa2PKa.get(aaSet.getCompoundForString("Y")), ph);