@Override public void initialize(UimaContext context) throws ResourceInitializationException { RelationExtractorAnnotator.allowClassifierModelOnClasspath(context); super.initialize(context); this.chunking = new BioChunking<BaseToken, Modifier>(BaseToken.class, Modifier.class, "typeID"); }
@Override protected Map<SUB_CHUNK_TYPE, String> getSubChunkToOutcomeMap( JCas jCas, List<SUB_CHUNK_TYPE> chunkComponents, List<CHUNK_TYPE> chunks) { Feature feature = this.getFeature(jCas); Map<SUB_CHUNK_TYPE, String> subChunkToOutcome = new HashMap<SUB_CHUNK_TYPE, String>(); for (CHUNK_TYPE chunk : chunks) { String suffix = this.getOutcomeSuffix(chunk, feature); boolean isBegin = true; for (SUB_CHUNK_TYPE chunkComponent : JCasUtil.selectCovered(this.subChunkClass, chunk)) { if (isBegin) { subChunkToOutcome.put(chunkComponent, "B" + suffix); isBegin = false; } else { subChunkToOutcome.put(chunkComponent, "I" + suffix); } } } return subChunkToOutcome; }
List<String> outcomes = this.chunking.createOutcomes(jCas, tokens, namedEntityMentions); this.chunking.createChunks(jCas, tokens, outcomes);
List<String> outcomes = this.chunking.createOutcomes(jCas, tokens, times); this.dataWriter.write(Instances.toInstances(outcomes, featureLists)); this.chunking.createChunks(jCas, tokens, outcomes);
@Override public void initialize(UimaContext context) throws ResourceInitializationException { RelationExtractorAnnotator.allowClassifierModelOnClasspath(context); super.initialize(context); this.chunking = new BioChunking<BaseToken, Modifier>(BaseToken.class, Modifier.class, "typeID"); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // define chunking type this.chunking = new BioChunking<Token, Time>(Token.class, Time.class); // add features: word, character pattern, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor(); this.tokenFeatureExtractors.add(ex); this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); // add window of features before and after this.contextFeatureExtractors = Lists.newArrayList(); for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) { this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding( 3), new Following(3))); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // the token feature extractor: text, char pattern (uppercase, digits, etc.), and part-of-speech this.extractor = new CombinedExtractor1<Token>( new FeatureFunctionExtractor<Token>( new CoveredTextExtractor<Token>(), new CharacterCategoryPatternFunction<Token>(PatternType.REPEATS_MERGED)), new TypePathExtractor<Token>(Token.class, "pos")); // the context feature extractor: the features above for the 3 preceding and 3 following tokens this.contextExtractor = new CleartkExtractor<Token, Token>( Token.class, this.extractor, new Preceding(3), new Following(3)); // the chunking definition: Tokens will be combined to form NamedEntityMentions, with labels // from the "mentionType" attribute so that we get B-location, I-person, etc. this.chunking = new BioChunking<Token, NamedEntityMention>( Token.class, NamedEntityMention.class, "mentionType"); }