/** Creates a new JapaneseKatakanaStemFilterFactory */ public JapaneseKatakanaStemFilterFactory(Map<String,String> args) { super(args); minimumLength = getInt(args, MINIMUM_LENGTH_PARAM, JapaneseKatakanaStemFilter.DEFAULT_MINIMUM_LENGTH); if (minimumLength < 2) { throw new IllegalArgumentException("Illegal " + MINIMUM_LENGTH_PARAM + " " + minimumLength + " (must be 2 or greater)"); } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
/** * Activate and read the properties. Configures and initialises a POSTagger for each language configured in * CONFIG_LANGUAGES. * * @param ce the {@link org.osgi.service.component.ComponentContext} */ @Activate protected void activate(ComponentContext ce) throws ConfigurationException, IOException { log.info("activating smartcn tokenizing engine"); super.activate(ce); //init the Solr ResourceLoader used for initialising the components //first a ResourceLoader for this classloader, 2nd one using the commons.solr.core classloader //and third the parentResourceLoader (if present). resourceLoader = new StanbolResourceLoader(KuromojiNlpEngine.class.getClassLoader(), new StanbolResourceLoader(parentResourceLoader)); tokenizerFactory = new JapaneseTokenizerFactory(TOKENIZER_FACTORY_CONFIG); ((ResourceLoaderAware) tokenizerFactory).inform(resourceLoader); //base form filter TokenFilterFactory baseFormFilterFactory = new JapaneseBaseFormFilterFactory(BASE_FORM_FILTER_CONFIG); filterFactories.add(baseFormFilterFactory); //POS filter TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory(POS_FILTER_CONFIG); ((ResourceLoaderAware) posFilterFactory).inform(resourceLoader); filterFactories.add(posFilterFactory); //Stemming TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory(STEMM_FILTER_CONFIG); filterFactories.add(stemmFilterFactory); }