private void printSectionStart(String desc, JCas cas, int begin) { String str = cas.getDocumentText(); if (begin < 0) { System.out.println("AAAA - " + desc + " - -1 - " + BlueCasUtil.getHeaderDocId(cas)); } else if (begin < str.length()) { int end = begin + 50; if (end > str.length()) { end = str.length() - 1; } System.out.println("AAAA - "+desc+" ("+ BlueCasUtil.getHeaderDocId(cas)+":"+begin+"): "+str.substring(begin, end)); } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { String title = getTitle(jCas); String text = jCas.getDocumentText(); // add title to text if too small if (text.length() < minTextLenght && title.length() > 0) { text = title + " " + text; } // only detect if text is long enough if (text != null && text.length() > minTextLenght) { // TODO maybe cut if text too long --> slower try { jCas.setDocumentLanguage(detect(text)); } catch (LangDetectException e) { LOG.warn("error detecting language for {}, {}", getHeaderDocId(jCas), e); } } }
Cooccurrence[] array = coocs.toArray(new Cooccurrence[coocs.size()]); Preconditions.checkArgument(predictedLabels.size() == coocs.size(), "pmid" + getHeaderDocId(jCas) + " should have same # of elems, but was: coocs=" + coocs.size() + " and predictedLabels="
public static void expandAbbreviations(JCas jCas) { String pmId = getHeaderDocId(jCas); // otherwise was very slow Map<Abbreviation, List<Annotation>> cache = newHashMap(); List<Abbreviation> tmp = newLinkedList(select(jCas, Abbreviation.class)); for (Abbreviation abrev : tmp) { Annotation reference = abrev.getTextReference(); if (reference != null && reference instanceof Abbreviation) { Abbreviation aRef = (Abbreviation) reference; List<Annotation> covereds; if (cache.containsKey(aRef)) covereds = cache.get(aRef); else { covereds = getCovered(jCas, aRef, pmId); cache.put(aRef, covereds); } // copy them to the other abbreviation short-forms for (Annotation covered : covereds) { Annotation clone = (Annotation) covered.clone(); clone.setBegin(abrev.getBegin()); clone.setEnd(abrev.getEnd()); clone.addToIndexes(jCas); if (!clone.getCoveredText().equals(aRef.getCoveredText())) LOG.warn("'{}' not matching2 '{}' in " + pmId, clone.getCoveredText(), aRef.getCoveredText()); } } } }
+ "' in pmid " + getHeaderDocId(jCas), e);
List<SentenceExample> retSentences = newArrayList(); String pmId = getHeaderDocId(jCas); int sentenceId = 0;
LOG.debug("Wordnet exception while processing >" + t.getCoveredText() + "< [" + t.getBegin() + ":" + t.getEnd() + "] from doc " + getHeaderDocId(jCas));