public int getEnd() { return uimaToken.getEnd(); }
public int getEnd() { return uimaToken.getEnd(); }
private int getEnd() { return token.getEnd(); } }
private static List<Token> tokensBetween(JCas aJCas, int from, int to) { List<Token> tokenList = new ArrayList<Token>(); for (Token token: JCasUtil.select(aJCas, Token.class)) { if ( (token.getBegin() >= from) && (token.getEnd() <= to)) { tokenList.add(token); } } return tokenList; }
/** * Given a list of tokens (e.g. those from a sentence) return the one at the specified position. */ private Token getToken(List<Token> aTokens, int aBegin, int aEnd) { for (Token t : aTokens) { if (aBegin == t.getBegin() && aEnd == t.getEnd()) { return t; } } throw new IllegalStateException("Token not found"); }
@Override public void process(JCas aInput, JCas aOutput) throws AnalysisEngineProcessException { select(aInput, Token.class).stream() .filter(token -> regexPattern.matcher(token.getCoveredText()).matches()) .forEach(token -> replace(token.getBegin(), token.getEnd(), replacement)); }
@Override public void process(JCas aInput, JCas aOutput) throws AnalysisEngineProcessException { select(aInput, Token.class).stream() .filter(token -> ignoreCase ? tokensToReplace.contains(token.getCoveredText().toLowerCase()) : tokensToReplace.contains(token.getCoveredText())) .forEach(token -> replace(token.getBegin(), token.getEnd(), replacement)); }
private void annotateChunks(JCas aJCas, List<int[]> aChunkStartEndIdx) { if (readToken) { List<Token> tokens = new ArrayList<Token>(JCasUtil.select(aJCas, Token.class)); for (int[] chunks : aChunkStartEndIdx) { int begin = tokens.get(chunks[0]).getBegin(); int end = tokens.get(chunks[1]).getEnd(); Chunk c = new Chunk(aJCas, begin, end); c.addToIndexes(); } } }
public static int getCandidatePosition(Annotation candidate, List<Token> tokens) { int position = -1; for (int i = 0; i < tokens.size(); i++) { if (tokens.get(i).getBegin() == candidate.getBegin() && tokens.get(i).getEnd() == candidate.getEnd()) { position = i; } } return position; }
@Override public void init(JCas aJCas) { text = aJCas.getDocumentText(); tokens = new ArrayList<>(); for (Token token : select(aJCas, Token.class)) { Span s = new Span(token.getBegin(), token.getEnd()); tokens.add(s); } }
public static int getCandidatePosition(Annotation candidate, List<Token> tokens) { int position = -1; for (int i = 0; i < tokens.size(); i++) { if (tokens.get(i).getBegin() == candidate.getBegin() && tokens.get(i).getEnd() == candidate.getEnd()) { position = i; } } return position; }
private static String outToken(Token t) { return String.format("%s[%s:%s]", t.getCoveredText(), t.getBegin(), t.getEnd()); }
private void annotateTaggingResultsLinkToTokens(JCas aJCas) { List<Token> tokens = getTokens(aJCas); List<TextClassificationOutcome> outcomes = getPredictions(aJCas); for (int i = 0; i < tokens.size(); i++) { Token token = tokens.get(i); String outcome = outcomes.get(i).getOutcome(); POS p = createPartOfSpeechAnnotationFromOutcome(aJCas, token.getBegin(), token.getEnd(), outcome); token.setPos(p); } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { for (Token token : JCasUtil.select(jcas, Token.class)) { PhoneticTranscription transcription = new PhoneticTranscription(jcas, token.getBegin(), token.getEnd()); transcription.setTranscription(encode(token.getCoveredText())); transcription.setName(encoder.getClass().getName()); transcription.addToIndexes(); } }
@Test public void testTokenBoundedSpanWithoutFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.addToIndexes(); } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
@Test public void testTokenBoundedSpanWithUnderscoreFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue("_"); ne.addToIndexes(); } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
@Test public void testTokenBoundedSpanWithAsteriskFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue("*"); ne.addToIndexes(); } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
@Test public void testTokenBoundedSpanWithNastyFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity:value"); ne.addToIndexes(); } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
@Test public void testTokenBoundedSpanWithSpecialSymbolsValue() throws Exception { JCas jcas = makeJCasOneSentence(); for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue("#*'\"`´\t:;{}|[ ]()\\§$%?=&_\n"); ne.addToIndexes(); } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }
@Test public void testTokenBoundedBioLookAlike() throws Exception { JCas jcas = makeJCasOneSentence(); int n = 0; for (Token t : select(jcas, Token.class)) { Span ne = new Span(jcas, t.getBegin(), t.getEnd()); ne.setValue(((n == 0) ? "B-" : "I-") + "NOTBIO!"); ne.addToIndexes(); n++; } writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList(Span.class)); }