protected void addToMoleculeCollection(String sequence, final String currentId, final Set<Protein> parsedMolecules) { sequence = WHITE_SPACE_PATTERN.matcher(sequence).replaceAll(""); Protein thisProtein = new Protein(sequence); // Check if this sequence is already in the Set. If it is, retrieve it. boolean isMoleculeAdded = parsedMolecules.add(thisProtein); if (!isMoleculeAdded) { for (Protein existing : parsedMolecules) { if (existing.getMd5().equals(thisProtein.getMd5())) { thisProtein = existing; break; } } } // Add the Xref to the Protein object. (Being added to a Set, so no risk of duplicates) thisProtein.addCrossReference(XrefParser.getProteinXref(currentId)); } }
@XmlElement(name = "sequence") private Sequence getSequenceObject() { return new Sequence(getSequence(), getMd5()); }
final Map<String, Protein> md5ToPrecalcProtein = new HashMap<>(localPrecalculatedProteins.size()); for (Protein precalc : localPrecalculatedProteins) { md5ToPrecalcProtein.put(precalc.getMd5(), precalc); if (md5ToPrecalcProtein.keySet().contains(protein.getMd5())) { precalculatedProteins.add(md5ToPrecalcProtein.get(protein.getMd5())); } else { addProteinToBatch(protein);
LOGGER.trace("Matching protein count: " + matchingProteins.size()); for (Protein protein : matchingProteins) { LOGGER.trace("Protein ID: " + protein.getId() + " MD5: " + protein.getMd5()); LOGGER.trace("Has " + protein.getMatches().size() + " matches"); for (ProteinXref xref : protein.getCrossReferences()) {
newMd5s.add(newProtein.getMd5()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("MD5 of new protein: " + newProtein.getMd5()); for (Protein existingProtein : (List<Protein>) query.getResultList()) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Found 1 existing protein with MD5: " + existingProtein.getMd5()); md5ToExistingProtein.put(existingProtein.getMd5(), existingProtein); if (md5ToExistingProtein.keySet().contains(candidate.getMd5())) { Protein existingProtein = md5ToExistingProtein.get(candidate.getMd5()); boolean updateRequired = false; if (candidate.getCrossReferences() != null) { LOGGER.trace("Merging protein with new Xrefs: " + existingProtein.getMd5()); LOGGER.trace("Saving new protein: " + candidate.getMd5()); md5ToExistingProtein.put(candidate.getMd5(), candidate);
/** * Writes out a Protein object to a GFF version 3 file * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); String proteinIdForGFF = null; List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { if (matches.size() > 0) { proteinIdFromGetorf = getValidGFF3SeqId(proteinIdFromGetorf); writeSequenceRegionPart(protein, sequenceLength, md5, proteinIdFromGetorf); processMatches(matches, proteinIdForGFF, date, protein, getNucleotideId()); } } return 0; }
/** * Writes out a Protein object to a GFF version 3 file * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); String proteinIdForGFF = null; List<String> proteinIdsFromGetOrf = getProteinAccessions(protein); for (String proteinIdFromGetorf : proteinIdsFromGetOrf) { if (matches.size() > 0) { proteinIdFromGetorf = getValidGFF3SeqId(proteinIdFromGetorf); writeSequenceRegionPart(protein, sequenceLength, md5, proteinIdFromGetorf); processMatches(matches, proteinIdForGFF, date, protein, getNucleotideId()); } } return 0; }
List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
return null; final String upperMD5 = protein.getMd5().toUpperCase();
Protein protein = proteinIdToProteinMap.get(rp.getProteinIdentifier()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("persist protein: " + protein.getId() + " md5:" + protein.getMd5());
/** * Writes out all protein matches for the specified protein (GFF formatted). * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { List<String> proteinIdsForGFF = getProteinAccessions(protein); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); //Write sequence region information for (String proteinIdForGFF: proteinIdsForGFF) { if (matches.size() > 0) { //Check if protein accessions are GFF3 valid proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF); //Write sequence-region super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength); if (writeFullGFF) { writeReferenceLine(proteinIdForGFF, sequenceLength, md5); addFASTASeqToMap(proteinIdForGFF, protein.getSequence()); } processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF); }//end match size check } return 0; }
List<String> proteinAcs = getProteinAccessions(protein); int length = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date());
/** * Writes out all protein matches for the specified protein (GFF formatted). * * @param protein containing matches to be written out * @return the number of rows printed (i.e. the number of Locations on Matches). * @throws java.io.IOException in the event of I/O problem writing out the file. */ public int write(Protein protein) throws IOException { List<String> proteinIdsForGFF = getProteinAccessions(protein); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); //Write sequence region information for (String proteinIdForGFF: proteinIdsForGFF) { if (matches.size() > 0) { //Check if protein accessions are GFF3 valid proteinIdForGFF = ProteinMatchesGFFResultWriter.getValidGFF3SeqId(proteinIdForGFF); //Write sequence-region super.gffWriter.write("##sequence-region " + proteinIdForGFF + " 1 " + sequenceLength); if (writeFullGFF) { writeReferenceLine(proteinIdForGFF, sequenceLength, md5); addFASTASeqToMap(proteinIdForGFF, protein.getSequence()); } processMatches(matches, proteinIdForGFF, date, protein, proteinIdForGFF, writeFullGFF); }//end match size check } return 0; }
Protein protein = orf.getProtein(); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); processMatches(protein.getMatches(), proteinIdForGFF, date, protein, getNucleotideId()); } else { throw new IllegalStateException("Cannot find the ORF object that maps to protein with MD5: " + protein.getMd5());
Protein protein = orf.getProtein(); int sequenceLength = protein.getSequenceLength(); String md5 = protein.getMd5(); String date = dmyFormat.format(new Date()); Set<Match> matches = protein.getMatches(); processMatches(protein.getMatches(), proteinIdForGFF, date, protein, getNucleotideId()); } else { throw new IllegalStateException("Cannot find the ORF object that maps to protein with PK / MD5: " + protein.getId() + " / " + protein.getMd5());
throw new IllegalStateException("Cannot find the ORF object that maps to protein with MD5: " + protein.getMd5());
throw new IllegalStateException("Cannot find the ORF object that maps to protein with PK / MD5: " + protein.getId() + " / " + protein.getMd5());