/** Creates a new JapanesePartOfSpeechStopFilterFactory */ public JapanesePartOfSpeechStopFilterFactory(Map<String,String> args) { super(args); stopTagFiles = get(args, "tags"); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
@Override public void inform(ResourceLoader loader) throws IOException { stopTags = null; CharArraySet cas = getWordSet(loader, stopTagFiles, false); if (cas != null) { stopTags = new HashSet<>(); for (Object element : cas) { char chars[] = (char[]) element; stopTags.add(new String(chars)); } } }
/** * Activate and read the properties. Configures and initialises a POSTagger for each language configured in * CONFIG_LANGUAGES. * * @param ce the {@link org.osgi.service.component.ComponentContext} */ @Activate protected void activate(ComponentContext ce) throws ConfigurationException, IOException { log.info("activating smartcn tokenizing engine"); super.activate(ce); //init the Solr ResourceLoader used for initialising the components //first a ResourceLoader for this classloader, 2nd one using the commons.solr.core classloader //and third the parentResourceLoader (if present). resourceLoader = new StanbolResourceLoader(KuromojiNlpEngine.class.getClassLoader(), new StanbolResourceLoader(parentResourceLoader)); tokenizerFactory = new JapaneseTokenizerFactory(TOKENIZER_FACTORY_CONFIG); ((ResourceLoaderAware) tokenizerFactory).inform(resourceLoader); //base form filter TokenFilterFactory baseFormFilterFactory = new JapaneseBaseFormFilterFactory(BASE_FORM_FILTER_CONFIG); filterFactories.add(baseFormFilterFactory); //POS filter TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory(POS_FILTER_CONFIG); ((ResourceLoaderAware) posFilterFactory).inform(resourceLoader); filterFactories.add(posFilterFactory); //Stemming TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory(STEMM_FILTER_CONFIG); filterFactories.add(stemmFilterFactory); }