@XmlElement(name = "sequence") private Sequence getSequenceObject() { return new Sequence(getSequence(), getMd5()); }
protected void addToMoleculeCollection(String sequence, String currentId, Set<NucleotideSequence> parsedMolecules) { sequence = WHITE_SPACE_PATTERN.matcher(sequence).replaceAll(""); NucleotideSequence thisMolecule = new NucleotideSequence(sequence); // Check if this sequence is already in the Set. If it is, retrieve it. boolean alreadyExists = false; for (NucleotideSequence existing : parsedMolecules) { if (existing.getMd5().equals(thisMolecule.getMd5())) { thisMolecule = existing; alreadyExists = true; break; } } // New sequence - add it to the collection. if (!alreadyExists) { parsedMolecules.add(thisMolecule); } // Add the identifier to the Protein object. (Being added to a Set, so no risk of duplicates) thisMolecule.addCrossReference(XrefParser.getNucleotideSequenceXref(currentId)); } }
newMd5s.add(newSequence.getMd5()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("MD5 of new nucleotide sequence: " + newSequence.getMd5()); for (NucleotideSequence existingSequence : (List<NucleotideSequence>) query.getResultList()) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Found 1 existing nucleotide sequence with MD5: " + existingSequence.getMd5()); md5ToExistingSequence.put(existingSequence.getMd5(), existingSequence); count ++; if (md5ToExistingSequence.keySet().contains(candidate.getMd5())) { NucleotideSequence existingSequence = md5ToExistingSequence.get(candidate.getMd5()); boolean updateRequired = false; if (candidate.getCrossReferences() != null) { md5ToExistingSequence.put(candidate.getMd5(), candidate);
private List<String> getNucleicAcidLine(NucleotideSequence nucleotideSeq) { String seqId = getNucleotideId(); int end = nucleotideSeq.getSequence().length(); GFF3Feature nucleicAcidFeature = new GFF3Feature(seqId, "provided_by_user", "nucleic_acid", 1, end, "+"); nucleicAcidFeature.addAttribute(GFF3Feature.ID_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.NAME_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.MD5_ATTR, nucleotideSeq.getMd5()); return nucleicAcidFeature.getGFF3FeatureLine(); }
private List<String> getNucleicAcidLine(NucleotideSequence nucleotideSeq) { String seqId = getNucleotideId(); int end = nucleotideSeq.getSequence().length(); GFF3Feature nucleicAcidFeature = new GFF3Feature(seqId, "provided_by_user", "nucleic_acid", 1, end, "+"); nucleicAcidFeature.addAttribute(GFF3Feature.ID_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.NAME_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.MD5_ATTR, nucleotideSeq.getMd5()); return nucleicAcidFeature.getGFF3FeatureLine(); }
private List<String> getORFLine(OpenReadingFrame orf, String proteinIdFromGetorf, String proteinIdForGFF, int proteinLength) { if (orf == null) { throw new IllegalArgumentException("A null orf has been passed in."); } final String seqId = getNucleotideId(); final String strand = (NucleotideSequenceStrand.SENSE.equals(orf.getStrand()) ? "+" : "-"); final String orfIdentifier = buildOrfIdentifier(orf); GFF3Feature orfFeature = new GFF3Feature(seqId, "getorf", "ORF", orf.getStart(), orf.getEnd(), strand); orfFeature.addAttribute(GFF3Feature.ID_ATTR, orfIdentifier); orfFeature.addAttribute(GFF3Feature.NAME_ATTR, proteinIdFromGetorf); orfFeature.addAttribute(GFF3Feature.TARGET_ATTR, proteinIdForGFF + " 1" + " " + proteinLength); NucleotideSequence ntSeq = orf.getNucleotideSequence(); if (orf.getNucleotideSequence() != null) { orfFeature.addAttribute(GFF3Feature.MD5_ATTR, ntSeq.getMd5()); } return orfFeature.getGFF3FeatureLine(); }
private List<String> getORFLine(OpenReadingFrame orf, String proteinIdFromGetorf, String proteinIdForGFF, int proteinLength) { if (orf == null) { throw new IllegalArgumentException("A null orf has been passed in."); } final String seqId = getNucleotideId(); final String strand = (NucleotideSequenceStrand.SENSE.equals(orf.getStrand()) ? "+" : "-"); final String orfIdentifier = buildOrfIdentifier(orf); GFF3Feature orfFeature = new GFF3Feature(seqId, "getorf", "ORF", orf.getStart(), orf.getEnd(), strand); orfFeature.addAttribute(GFF3Feature.ID_ATTR, orfIdentifier); orfFeature.addAttribute(GFF3Feature.NAME_ATTR, proteinIdFromGetorf); orfFeature.addAttribute(GFF3Feature.TARGET_ATTR, proteinIdForGFF + " 1" + " " + proteinLength); NucleotideSequence ntSeq = orf.getNucleotideSequence(); if (orf.getNucleotideSequence() != null) { orfFeature.addAttribute(GFF3Feature.MD5_ATTR, ntSeq.getMd5()); } return orfFeature.getGFF3FeatureLine(); }