public void writeProtein(Gene protein) throws IOException { String id = protein.getId(); if (!writtenIds.contains(id)) { writtenIds.add(id); } String line = protein.getId() + "\tProtein " + protein.getBegin() + " " + protein.getEnd() + "\t" + documentText.substring(protein.getBegin(), protein.getEnd()) + "\n"; writer.write(line); }
/** * The protein lines have to match the Shared Task 2011 format:<br/> * <code> * ID<tab>Entity-Type[Protein]<tab>start<tab>end<tab>Mention name * </code> <br/> * Example: <samp> T3 Protein 166 174 TGF-beta </samp> * * @param aJCas * @return */ private List<String> getProteinLines(Map<String, Gene> proteins) { List<String> proteinLines = new ArrayList<>(); for (Entry<String, Gene> proteinEntry : proteins.entrySet()) { String id = proteinEntry.getKey(); Gene gene = proteinEntry.getValue(); try { proteinLines.add( id + "\tProtein\t" + gene.getBegin() + "\t" + gene.getEnd() + "\t" + gene.getCoveredText()); } catch (Exception e) { e.printStackTrace(); log.error("Failed to process Protein with its values."); } } return proteinLines; }
/** * Assigns an ID of the form <tt>Ti</tt> to each gene in the CAS, <tt>i</tt> * being an enumeration number beginning at 0. * * @param aJCas * @return */ private Map<String, Gene> enumerateProteins(JCas aJCas) { int i = 0; // just enumerate all genes Map<String, Gene> proteins = new HashMap<>(); FSIterator<Annotation> geneIt = aJCas.getAnnotationIndex(Gene.type).iterator(); // lastEnd holds the end offset of the gene in the prior iteration; we // use it to avoid overlapping genes. Those would most likely be an // error and cause errors in BioSem int lastEnd = 0; while (geneIt.hasNext()) { Gene gene = (Gene) geneIt.next(); if (gene.getBegin() < lastEnd) continue; String id = gene.getId(); // if (StringUtils.isBlank(id)) id = "T" + i++; gene.setId(id); proteins.put(id, gene); lastEnd = gene.getEnd(); } log.debug("Got {} non-overlapping genes/proteins in the document.", proteins.size()); return proteins; }
uimaArg = new ArgumentMention(aJCas, protein.getBegin(), protein.getEnd()); uimaArg.setRef(protein); uimaArg.setRole(determineArgumentRole(uimaEvent, uimaArg, argPos));