public void writeProtein(Gene protein) throws IOException { String id = protein.getId(); if (!writtenIds.contains(id)) { writtenIds.add(id); } String line = protein.getId() + "\tProtein " + protein.getBegin() + " " + protein.getEnd() + "\t" + documentText.substring(protein.getBegin(), protein.getEnd()) + "\n"; writer.write(line); }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Gene(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
/** * Assigns an ID of the form <tt>Ti</tt> to each gene in the CAS, <tt>i</tt> * being an enumeration number beginning at 0. * * @param aJCas * @return */ private Map<String, Gene> enumerateProteins(JCas aJCas) { int i = 0; // just enumerate all genes Map<String, Gene> proteins = new HashMap<>(); FSIterator<Annotation> geneIt = aJCas.getAnnotationIndex(Gene.type).iterator(); // lastEnd holds the end offset of the gene in the prior iteration; we // use it to avoid overlapping genes. Those would most likely be an // error and cause errors in BioSem int lastEnd = 0; while (geneIt.hasNext()) { Gene gene = (Gene) geneIt.next(); if (gene.getBegin() < lastEnd) continue; String id = gene.getId(); // if (StringUtils.isBlank(id)) id = "T" + i++; gene.setId(id); proteins.put(id, gene); lastEnd = gene.getEnd(); } log.debug("Got {} non-overlapping genes/proteins in the document.", proteins.size()); return proteins; }
/** * The protein lines have to match the Shared Task 2011 format:<br/> * <code> * ID<tab>Entity-Type[Protein]<tab>start<tab>end<tab>Mention name * </code> <br/> * Example: <samp> T3 Protein 166 174 TGF-beta </samp> * * @param aJCas * @return */ private List<String> getProteinLines(Map<String, Gene> proteins) { List<String> proteinLines = new ArrayList<>(); for (Entry<String, Gene> proteinEntry : proteins.entrySet()) { String id = proteinEntry.getKey(); Gene gene = proteinEntry.getValue(); try { proteinLines.add( id + "\tProtein\t" + gene.getBegin() + "\t" + gene.getEnd() + "\t" + gene.getCoveredText()); } catch (Exception e) { e.printStackTrace(); log.error("Failed to process Protein with its values."); } } return proteinLines; }
protein.setSpecificType("protein"); uimaArg = new ArgumentMention(aJCas, protein.getBegin(), protein.getEnd()); uimaArg.setRef(protein); uimaArg.setRole(determineArgumentRole(uimaEvent, uimaArg, argPos));
public boolean isWritten(Gene protein) { return writtenIds.contains(protein.getId()); }
/** Internal - constructor used by generator * @generated * @param addr low level Feature Structure reference * @param type the type of this Feature Structure */ public Gene(int addr, TOP_Type type) { super(addr, type); readObject(); }
if (reference instanceof Gene) { Gene protein = (Gene) reference; if (!proteinWriter.isWritten(protein) && protein.getSpecificType().equals("protein")) proteinWriter.writeProtein(protein); } else if (reference instanceof EntityMention) {
/** @generated * @param jcas JCas to which this Feature Structure belongs */ public Gene(JCas jcas) { super(jcas); readObject(); }