@XmlElement(name = "sequence") private Sequence getSequenceObject() { return new Sequence(getSequence(), getMd5()); }
protected void addToMoleculeCollection(String sequence, String currentId, Set<NucleotideSequence> parsedMolecules) { sequence = WHITE_SPACE_PATTERN.matcher(sequence).replaceAll(""); NucleotideSequence thisMolecule = new NucleotideSequence(sequence); // Check if this sequence is already in the Set. If it is, retrieve it. boolean alreadyExists = false; for (NucleotideSequence existing : parsedMolecules) { if (existing.getMd5().equals(thisMolecule.getMd5())) { thisMolecule = existing; alreadyExists = true; break; } } // New sequence - add it to the collection. if (!alreadyExists) { parsedMolecules.add(thisMolecule); } // Add the identifier to the Protein object. (Being added to a Set, so no risk of duplicates) thisMolecule.addCrossReference(XrefParser.getNucleotideSequenceXref(currentId)); } }
void setOpenReadingFrames(Set<OpenReadingFrame> orfs) { if (orfs == null) { throw new IllegalArgumentException("'orfs' must not be null"); } for (OpenReadingFrame orf : orfs) { addOpenReadingFrame(orf); } }
public void store(String sequence, Map<String, SignatureLibraryRelease> analysisJobMap, String... crossReferences) { if (sequence != null && sequence.length() > 0) { NucleotideSequence nucleotideSequence = new NucleotideSequence(sequence); if (crossReferences != null) { for (String crossReference : crossReferences) { NucleotideSequenceXref xref = XrefParser.getNucleotideSequenceXref(crossReference); nucleotideSequence.addCrossReference(xref); } } else { LOGGER.error("Have a nucleotide sequence with no associated Xrefs. There must be at least one xref associated with the nucleotide sequence"); } sequencesAwaitingInsertion.add(nucleotideSequence); if (sequencesAwaitingInsertion.size() > sequenceInsertBatchSize) { persistBatch(); } } }
newMd5s.add(newSequence.getMd5()); if (LOGGER.isDebugEnabled()) { LOGGER.debug("MD5 of new nucleotide sequence: " + newSequence.getMd5()); for (NucleotideSequence existingSequence : (List<NucleotideSequence>) query.getResultList()) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Found 1 existing nucleotide sequence with MD5: " + existingSequence.getMd5()); md5ToExistingSequence.put(existingSequence.getMd5(), existingSequence); count ++; if (md5ToExistingSequence.keySet().contains(candidate.getMd5())) { NucleotideSequence existingSequence = md5ToExistingSequence.get(candidate.getMd5()); boolean updateRequired = false; if (candidate.getCrossReferences() != null) { for (NucleotideSequenceXref xref : candidate.getCrossReferences()) { if (!existingSequence.getCrossReferences().contains(xref)) { existingSequence.addCrossReference(xref); updateRequired = true; md5ToExistingSequence.put(candidate.getMd5(), candidate);
for (OpenReadingFrame orf : nucleotideSequence.getOpenReadingFrames()) { Protein protein = orf.getProtein(); int sequenceLength = protein.getSequenceLength(); for (final NucleotideSequenceXref nucleotideSequenceXref : nucleotideSequence.getCrossReferences()) { String nucleotideSequenceXrefId = nucleotideSequenceXref.getIdentifier(); if (concatenatedNucSeqIdentifiersStr.length() > 0) { setNucleotideId(concatenatedNucSeqIdentifiersStr); super.gffWriter.write("##sequence-region " + concatenatedNucSeqIdentifiersStr + " 1 " + nucleotideSequence.getSequence().length()); super.gffWriter.write(getNucleicAcidLine(nucleotideSequence));
final StringBuilder concatenatedNucSeqIdentifiers = new StringBuilder(); for (final NucleotideSequenceXref nucleotideSequenceXref : nucleotideSequence.getCrossReferences()) { String nucleotideSequenceXrefId = nucleotideSequenceXref.getIdentifier(); if (concatenatedNucSeqIdentifiersStr.length() > 0) { setNucleotideId(concatenatedNucSeqIdentifiersStr); super.gffWriter.write("##sequence-region " + concatenatedNucSeqIdentifiersStr + " 1 " + nucleotideSequence.getSequence().length()); super.gffWriter.write(getNucleicAcidLine(nucleotideSequence));
private List<String> getORFLine(OpenReadingFrame orf, String proteinIdFromGetorf, String proteinIdForGFF, int proteinLength) { if (orf == null) { throw new IllegalArgumentException("A null orf has been passed in."); } final String seqId = getNucleotideId(); final String strand = (NucleotideSequenceStrand.SENSE.equals(orf.getStrand()) ? "+" : "-"); final String orfIdentifier = buildOrfIdentifier(orf); GFF3Feature orfFeature = new GFF3Feature(seqId, "getorf", "ORF", orf.getStart(), orf.getEnd(), strand); orfFeature.addAttribute(GFF3Feature.ID_ATTR, orfIdentifier); orfFeature.addAttribute(GFF3Feature.NAME_ATTR, proteinIdFromGetorf); orfFeature.addAttribute(GFF3Feature.TARGET_ATTR, proteinIdForGFF + " 1" + " " + proteinLength); NucleotideSequence ntSeq = orf.getNucleotideSequence(); if (orf.getNucleotideSequence() != null) { orfFeature.addAttribute(GFF3Feature.MD5_ATTR, ntSeq.getMd5()); } return orfFeature.getGFF3FeatureLine(); }
List<NucleotideSequence> seqs= nucleotideSequenceDAO.retrieveAll(); for (NucleotideSequence ns: seqs) { Set<NucleotideSequenceXref> nsXrefs = ns.getCrossReferences() ; for (NucleotideSequenceXref nsXref: nsXrefs) { LOGGER.debug("Nucleotide xref identifier: " + nsXref.getIdentifier());
public Long updateBottomNucleotideSequenceId(Long bottomNucleotideSequenceId) { for (NucleotideSequence newSequence : newSequences) { if (bottomNucleotideSequenceId == null || bottomNucleotideSequenceId > newSequence.getId()) { bottomNucleotideSequenceId = newSequence.getId(); } } return bottomNucleotideSequenceId; }
for (OpenReadingFrame orf : nucleotideSequence.getOpenReadingFrames()) { Protein protein = orf.getProtein(); int sequenceLength = protein.getSequenceLength(); for (final NucleotideSequenceXref nucleotideSequenceXref : nucleotideSequence.getCrossReferences()) { String nucleotideSequenceXrefId = nucleotideSequenceXref.getIdentifier(); if (concatenatedNucSeqIdentifiersStr.length() > 0) { setNucleotideId(concatenatedNucSeqIdentifiersStr); super.gffWriter.write("##sequence-region " + concatenatedNucSeqIdentifiersStr + " 1 " + nucleotideSequence.getSequence().length()); super.gffWriter.write(getNucleicAcidLine(nucleotideSequence));
final StringBuilder concatenatedNucSeqIdentifiers = new StringBuilder(); for (final NucleotideSequenceXref nucleotideSequenceXref : nucleotideSequence.getCrossReferences()) { String nucleotideSequenceXrefId = nucleotideSequenceXref.getIdentifier(); if (concatenatedNucSeqIdentifiersStr.length() > 0) { setNucleotideId(concatenatedNucSeqIdentifiersStr); super.gffWriter.write("##sequence-region " + concatenatedNucSeqIdentifiersStr + " 1 " + nucleotideSequence.getSequence().length()); super.gffWriter.write(getNucleicAcidLine(nucleotideSequence));
private List<String> getORFLine(OpenReadingFrame orf, String proteinIdFromGetorf, String proteinIdForGFF, int proteinLength) { if (orf == null) { throw new IllegalArgumentException("A null orf has been passed in."); } final String seqId = getNucleotideId(); final String strand = (NucleotideSequenceStrand.SENSE.equals(orf.getStrand()) ? "+" : "-"); final String orfIdentifier = buildOrfIdentifier(orf); GFF3Feature orfFeature = new GFF3Feature(seqId, "getorf", "ORF", orf.getStart(), orf.getEnd(), strand); orfFeature.addAttribute(GFF3Feature.ID_ATTR, orfIdentifier); orfFeature.addAttribute(GFF3Feature.NAME_ATTR, proteinIdFromGetorf); orfFeature.addAttribute(GFF3Feature.TARGET_ATTR, proteinIdForGFF + " 1" + " " + proteinLength); NucleotideSequence ntSeq = orf.getNucleotideSequence(); if (orf.getNucleotideSequence() != null) { orfFeature.addAttribute(GFF3Feature.MD5_ATTR, ntSeq.getMd5()); } return orfFeature.getGFF3FeatureLine(); }
public Long updateTopNucleotideSequenceId(Long topNucleotideSequenceId) { for (NucleotideSequence newSequence : newSequences) { if (topNucleotideSequenceId == null || topNucleotideSequenceId < newSequence.getId()) { topNucleotideSequenceId = newSequence.getId(); } } return topNucleotideSequenceId; } }
private List<String> getNucleicAcidLine(NucleotideSequence nucleotideSeq) { String seqId = getNucleotideId(); int end = nucleotideSeq.getSequence().length(); GFF3Feature nucleicAcidFeature = new GFF3Feature(seqId, "provided_by_user", "nucleic_acid", 1, end, "+"); nucleicAcidFeature.addAttribute(GFF3Feature.ID_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.NAME_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.MD5_ATTR, nucleotideSeq.getMd5()); return nucleicAcidFeature.getGFF3FeatureLine(); }
Long sequenceId = xref.getNucleotideSequence().getId(); final Query query = entityManager.createQuery(
void setCrossReferences(Set<OpenReadingFrame> orfs) { if (orfs == null) { throw new IllegalArgumentException("'orfs' must not be null"); } for (OpenReadingFrame orf : orfs) { addOpenReadingFrame(orf); } }
private List<String> getNucleicAcidLine(NucleotideSequence nucleotideSeq) { String seqId = getNucleotideId(); int end = nucleotideSeq.getSequence().length(); GFF3Feature nucleicAcidFeature = new GFF3Feature(seqId, "provided_by_user", "nucleic_acid", 1, end, "+"); nucleicAcidFeature.addAttribute(GFF3Feature.ID_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.NAME_ATTR, getNucleotideId()); nucleicAcidFeature.addAttribute(GFF3Feature.MD5_ATTR, nucleotideSeq.getMd5()); return nucleicAcidFeature.getGFF3FeatureLine(); }