protected List<ProteinImpl> getProteinTranscripts( Protein protein, String cvXRefQualifier) { if ( protein == null ) { throw new NullPointerException( "The master protein must not be null." ); } if ( cvXRefQualifier == null ) { throw new NullPointerException( "The Cv Xref qualifier must not be null." ); } String ac = protein.getAc(); if ( ac == null ) { // This protein doesn't have an AC, it cannot have protein transcripts. if (log.isWarnEnabled()) log.warn("Cannot find protein transcripts for a protein without AC: "+protein.getShortLabel()); return Collections.EMPTY_LIST; } Query query = getEntityManager().createQuery("select prot from ProteinImpl prot inner join " + "prot.xrefs as xref where " + "xref.cvXrefQualifier.identifier = :transcriptParentMi " + "and xref.cvDatabase.identifier = :intactMi " + "and xref.primaryId = :masterAc"); query.setParameter("transcriptParentMi", cvXRefQualifier); query.setParameter("intactMi", CvDatabase.INTACT_MI_REF); query.setParameter("masterAc", ac); return query.getResultList(); }
private static Collection<InteractorXref> extractCrossReferencesFrom(Protein protein, String databaseMiRef, String qualifierMiRef) { Collection<InteractorXref> parents = new ArrayList<InteractorXref>(); for (InteractorXref ref : protein.getXrefs()) { if (ref.getCvDatabase().getIdentifier().equals(databaseMiRef)) { if (ref.getCvXrefQualifier().getIdentifier().equals(qualifierMiRef)) { parents.add(ref); } } } return parents; }
public static boolean isNoUniprotUpdate(Protein protein){ Collection<Annotation> annotations = protein.getAnnotations(); for(Annotation annotation : annotations){ if(CvTopic.NON_UNIPROT.equals(annotation.getCvTopic().getShortLabel())){ return true ; } } return false; } }
public Protein createProteinChain(Protein masterProt, String uniprotId, String shortLabel) { Protein chain = createProtein(uniprotId, shortLabel); if (masterProt.getAc() == null) { throw new IllegalArgumentException("Cannot create a chain if the master protein does not have an AC: "+masterProt.getShortLabel()); } CvXrefQualifier chainParent = createCvObject(CvXrefQualifier.class, CvXrefQualifier.CHAIN_PARENT_MI_REF, CvXrefQualifier.CHAIN_PARENT); CvDatabase uniprotKb = createCvObject(CvDatabase.class, CvDatabase.INTACT_MI_REF, CvDatabase.INTACT); InteractorXref isoformXref = createXref(chain, masterProt.getAc(), chainParent, uniprotKb); chain.addXref(isoformXref); return chain; }
public Collection<GeneralMessage> check(Protein protein) throws SanityRuleException { Collection<GeneralMessage> messages = new ArrayList<GeneralMessage>(); String sequence = protein.getSequence(); if (sequence != null) { String calculatedCrc64 = Crc64.getCrc64(sequence); String storedCrc64 = protein.getCrc64(); if(!calculatedCrc64.equals(storedCrc64)){ messages.add(new GeneralMessage(MessageDefinition.PROTEIN_INCORRECT_CRC64, protein)); } } return messages; } }
/** * Get all interaction related to the given Protein. * * @param protein the protein of which we want the interactions. * * @return a Collection if Interaction. */ protected final List<Interaction> getInteractions(final Protein protein) { Collection<Component> components = protein.getActiveInstances(); int componentsSize = components.size(); List<Interaction> interactions = new ArrayList<Interaction>(componentsSize); out.println("Found " + componentsSize + " components for protein " + protein.getShortLabel() + ". Starting to get the interactions from those components."); for (Component component : components) { Interaction interaction = component.getInteraction(); if (!interactions.contains(interaction)) { interactions.add(interaction); } } return interactions; // return IntactContext.getCurrentInstance().getDataContext().getDaoFactory().getInteractionDao().getInteractionsByInteractorAc(protein.getAc()); }
if ( protein1.getBioSource().getTaxId().equals( proteinInteractor.getOrganism().getTaxId() ) ) { for ( Iterator iterator2 = protein1.getXrefs().iterator(); iterator2.hasNext() && ! xrefFound; ) case 1: protein = (Protein) proteins.iterator().next(); System.out.println( "Found 1 protein (" + protein.getShortLabel() + ") for Primary ID: " + proteinId + ", Database: " + db + " (IntAct)" ); break; Protein protein1 = (Protein) iterator.next(); message += protein1.getAc(); protein.setSequence( proteinInteractor.getSequence() ); protein.setCrc64( Crc64.getCrc64( proteinInteractor.getSequence() ) ); IntactContext.getCurrentInstance().getDataContext().getDaoFactory().getProteinDao().update( (ProteinImpl)protein ); System.out.println( "Created 1 protein (" + protein.getShortLabel() + ") for Primary ID: " + proteinId + ", Database: " + db + " (Not UniProt)" );
for ( Iterator iterator = proteins.iterator(); iterator.hasNext(); ) { Protein protein1 = (Protein) iterator.next(); BioSource biosource = protein1.getBioSource(); sb.append( biosource.getShortLabel() ).append( '(' ); sb.append( biosource.getTaxId() ).append( ')' ).append( ' ' ); try { if ( DEBUG ) { System.out.println( "search splice variant of master AC: " + protein.getAc() ); Collection spliceVariants = IntactContext.getCurrentInstance().getDataContext().getDaoFactory().getProteinDao().getByXrefLike(protein.getAc()); if ( sv.getShortLabel().startsWith( id.toLowerCase() ) ) { spliceVariant = sv; break; // exit the loop. for ( Iterator iterator = proteins.iterator(); iterator.hasNext(); ) { Protein protein1 = (Protein) iterator.next(); BioSource biosource = protein1.getBioSource(); sb.append( biosource.getShortLabel() ).append( '(' ); sb.append( biosource.getTaxId() ).append( ')' ).append( ' ' );
private Protein createProtein(String uniprotId, String shortLabel, String geneName, BioSource bioSource, CvInteractorType intType) { Protein protein = new ProteinImpl(getInstitution(), bioSource, shortLabel, intType); InteractorXref idXref = createIdentityXrefUniprot(protein, uniprotId); protein.addXref(idXref); InteractorAlias alias = createAliasGeneName(protein, geneName); protein.addAlias(alias); String sequence = randomPeptideSequence(); String crc64 = Crc64.getCrc64(sequence); protein.setSequence(sequence); protein.setCrc64(crc64); return protein; }
public DuplicatedProteinEvent(ProteinUpdateProcess updateProcess, Protein duplicatedProtein, Protein originalProtein, String uniprotAc, boolean wasMergeSuccessful){ super(updateProcess, duplicatedProtein, uniprotAc); this.originalProtein = originalProtein != null ? originalProtein.getAc() : null; this.wasMergeSuccessful = wasMergeSuccessful; }
/** * Get the uniprot primary ID from Protein and Splice variant. * * @param protein the Protein for which we want the uniprot ID. * * @return the uniprot ID as a String or null if none is found (should not occur) */ public String getUniprotPrimaryAc(final Protein protein) { if (protAcToUniprotIdCache.containsKey(protein.getAc())) { return protAcToUniprotIdCache.get(protein.getAc()); } String uniprotId = null; Collection<InteractorXref> xrefs = protein.getXrefs(); for (InteractorXref xref : xrefs) { if (getUniprot().equals(xref.getCvDatabase()) && getIdentity().equals(xref.getCvXrefQualifier())) { uniprotId = xref.getPrimaryId(); break; } } protAcToUniprotIdCache.put(protein.getAc(), uniprotId); return uniprotId; } }
BioSource2xmlFactory.getInstance( session ).createOrganism( session, element, protein.getBioSource() ); Text sequenceText = session.createTextNode( protein.getSequence() ); sequence.appendChild( sequenceText ); element.appendChild( sequence );
String proteinID) { String tremblEntry = SearchReplace.replace( TREMBL_TEMPLATE, PROTEIN_SEQUENCE_LENGTH, "" + protein.getSequence().length() ); tremblEntry = SearchReplace.replace( tremblEntry, PROTEIN_FULLNAME, protein.getFullName() ); tremblEntry = SearchReplace.replace( tremblEntry, NCBI_TAXID, protein.getBioSource().getTaxId() ); tremblEntry = SearchReplace.replace( tremblEntry, R_LINE_REFERENCE_LINE, referenceLine ); tremblEntry = SearchReplace.replace( tremblEntry, PROTEIN_AC, protein.getAc() ); String time = DATE_FORMATER.format( new Date( protein.getCreated().getTime() ) ); tremblEntry = SearchReplace.replace( tremblEntry, PROTEIN_CREATION_DATE, time.toUpperCase() ); tremblEntry = SearchReplace.replace( tremblEntry, PROTEIN_CRC64, Crc64.getCrc64( protein.getSequence() ) ); String formatedSequence = formatSequence( protein.getSequence() ); tremblEntry = SearchReplace.replace( tremblEntry, PROTEIN_SEQUENCE, formatedSequence );
if (!protein1.getBioSource().equals(protein2.getBioSource())) { buffer.append(' ').append("(xeno)"); buffer.append("IntAct=").append(protein1.getAc()).append(',').append(' ').append(protein2.getAc()).append(';');
protected ProteinImpl getProteinTranscriptsMasterProtein( Protein proteinTranscript, String cvXRefQualifier) { if ( proteinTranscript == null ) { throw new NullPointerException( "proteinTranscript must not be null." ); } if ( cvXRefQualifier == null ) { throw new NullPointerException( "The Cv Xref qualifier must not be null." ); } String masterProtAc = null; for (InteractorXref xref : proteinTranscript.getXrefs()) { if (xref.getCvXrefQualifier() != null && cvXRefQualifier.equals(xref.getCvXrefQualifier().getIdentifier())) { if (masterProtAc == null) { masterProtAc = xref.getPrimaryId(); } else { throw new IntactException("This protein transcript contains more than one "+cvXRefQualifier+" xrefs: "+proteinTranscript.getShortLabel() ); } } } // search protein by AC return getByAc(masterProtAc); }
/** * @param protein : the protein to check * @return a set of the protein feature ranges which are overlapping or out of bound */ public static Set<Range> getBadRanges(Protein protein) { Collection<Component> components = protein.getActiveInstances(); Set<Range> badRanges = new HashSet<Range>(); for (Component component : components) { Collection<Feature> features = component.getFeatures(); for (Feature feature : features) { Collection<Range> ranges = feature.getRanges(); for (Range range : ranges) { if (isABadRange(range, protein.getSequence())) { badRanges.add(range); } } } } return badRanges; }
for ( Iterator iterator = protein.getAliases().iterator(); iterator.hasNext() && geneName == null; ) { final Alias alias = (Alias) iterator.next(); geneName = protein.getShortLabel(); System.out.println( "NOTICE: protein " + protein.getShortLabel() + " does not have a gene name, we will use it's SPTR ID: " + geneName );
/** * Check if the set of proteins is related to more than one biosource. * * @param proteins * * @return true if more than one distinct biosource found, else false. */ private static boolean hasMultipleBioSource( final Collection proteins ) { Set biosources = new HashSet(); for ( Iterator iterator = proteins.iterator(); iterator.hasNext(); ) { Protein protein = (Protein) iterator.next(); BioSource bioSource = protein.getBioSource(); if ( null != bioSource ) { biosources.add( bioSource ); } } boolean answer; if ( biosources.size() > 1 ) { answer = true; } else { answer = false; } return answer; }
/** * Assess if a protein is a aplice variant on the basis of its shortlabel as we use the following format SPID-# and * if it has a isoform-parent cross reference. <br> Thought it doesn't mean we will find a master protein for it. * * @param protein the protein we are interrested in knowing if it is a splice variant. * * @return true if the name complies to the splice variant format. */ protected boolean isSpliceVariant(Protein protein) { // TODO check here is it has a master or not. if (protein.getShortLabel().indexOf("-") != -1) { // eg. P12345-2 if (getMasterAc(protein) != null) { return true; } } return false; }
sequence = protein.getSequence();