/** * The {@link LanguageProcessingConfig} for the parsed language * or <code>null</code> if the language is not included in the * configuration. This will return the {@link #getDefaults()} if * the parsed language does not have a specific configuration.<p> * To obtain just language specific configuration use * {@link #getLanguageSpecificConfig(String)} * @param language the language * @return the configuration or <code>null</code> if the language is * not configured to be processed. */ public LanguageProcessingConfig getConfiguration(String language) { if(languages.isLanguage(language)){ LanguageProcessingConfig lpc = languageConfigs.get(language); return lpc == null ? defaultConfig : lpc; } else { return null; } }
/** * The {@link LanguageProcessingConfig} for the parsed language * or <code>null</code> if the language is not included in the * configuration. This will return the {@link #getDefaults()} if * the parsed language does not have a specific configuration.<p> * To obtain just language specific configuration use * {@link #getLanguageSpecificConfig(String)} * @param language the language * @return the configuration or <code>null</code> if the language is * not configured to be processed. */ public LanguageProcessingConfig getConfiguration(String language) { if(languages.isLanguage(language)){ LanguageProcessingConfig lpc = languageConfigs.get(language); return lpc == null ? defaultConfig : lpc; } else { return null; } }
@SuppressWarnings("unchecked") private List<ServiceReference> initTokenizers(String language) { List<ServiceReference> tokenizers = new ArrayList<ServiceReference>(); if(labelTokenizerTracker.getServiceReferences() != null){ for(ServiceReference ref : labelTokenizerTracker.getServiceReferences()){ LanguageConfiguration langConf = ref2LangConfig.get(ref); if(langConf != null && langConf.isLanguage(language)){ tokenizers.add(ref); } } } if(tokenizers.size() > 1){ Collections.sort(tokenizers); } this.langTokenizers.put(language, tokenizers); return tokenizers; }
/** * Used in {@link #canEnhance(ContentItem)} to check if a {@link ContentItem} * should be processed based on the language configuration of this engine. * @param engine the {@link EnhancementEngine} calling this method * @param languageConfiguration the language configuration * @param language the language * @param exception <code>false</code> id used in {@link #canEnhance(ContentItem)} * and <code>true</code> when called from {@link #computeEnhancements(ContentItem)} * @return the state * @throws IllegalStateException if exception is <code>true</code> and the * language is not configured as beeing processed. */ public static boolean isLangaugeConfigured(EnhancementEngine engine, LanguageConfiguration languageConfiguration, String language, boolean exception){ boolean state = languageConfiguration.isLanguage(language); if(!state && exception){ throw new IllegalStateException("Language "+language+" is not included " + "by the LanguageConfiguration of this engine (name "+ engine.getName() + "). As this is also checked in canEnhancer this may indicate an Bug in the " + "used EnhancementJobManager!"); } else { return state; } }
/** * Used in {@link #canEnhance(ContentItem)} to check if a {@link ContentItem} * should be processed based on the language configuration of this engine. * @param engine the {@link EnhancementEngine} calling this method * @param languageConfiguration the language configuration * @param language the language * @param exception <code>false</code> id used in {@link #canEnhance(ContentItem)} * and <code>true</code> when called from {@link #computeEnhancements(ContentItem)} * @return the state * @throws IllegalStateException if exception is <code>true</code> and the * language is not configured as beeing processed. */ public static boolean isLangaugeConfigured(EnhancementEngine engine, LanguageConfiguration languageConfiguration, String language, boolean exception){ boolean state = languageConfiguration.isLanguage(language); if(!state && exception){ throw new IllegalStateException("Language "+language+" is not included " + "by the LanguageConfiguration of this engine (name "+ engine.getName() + "). As this is also checked in canEnhancer this may indicate an Bug in the " + "used EnhancementJobManager!"); } else { return state; } }
@SuppressWarnings("unchecked") private List<ServiceReference> initTokenizers(String language) { List<ServiceReference> tokenizers = new ArrayList<ServiceReference>(); if(labelTokenizerTracker.getServiceReferences() != null){ for(ServiceReference ref : labelTokenizerTracker.getServiceReferences()){ LanguageConfiguration langConf = ref2LangConfig.get(ref); if(langConf != null && langConf.isLanguage(language)){ tokenizers.add(ref); } } } if(tokenizers.size() > 1){ Collections.sort(tokenizers); } this.langTokenizers.put(language, tokenizers); return tokenizers; }
return CANNOT_ENHANCE; if(!languageConfig.isLanguage(language)){ log.trace(" > can NOT enhance ContentItem {} because language {} is " + "not enabled by this engines configuration",ci,language);
return CANNOT_ENHANCE; if(!languageConfig.isLanguage(language)){ log.trace(" > can NOT enhance ContentItem {} because language {} is " + "not enabled by this engines configuration",ci,language);
return CANNOT_ENHANCE; if(!languageConfig.isLanguage(language)){ log.trace(" > can NOT enhance ContentItem {} because language {} is " + "not enabled by this engines configuration",ci,language);
return CANNOT_ENHANCE; if(!languageConfig.isLanguage(language)){ log.trace(" > can NOT enhance ContentItem {} because language {} is " + "not enabled by this engines configuration",ci,language);
langConf.isLanguage(language)){ if(label.isEmpty()){ return EMPTY;
@Override public String[] tokenize(String label, String language) { if(label == null){ throw new IllegalArgumentException("The parsed Label MUST NOT be NULL!"); } if(languageConfig.isLanguage(language)){ String modelName = languageConfig.getParameter(language, PARAM_MODEL); if(modelName != null){ try { TokenizerModel model = openNlp.getModel(TokenizerModel.class, modelName, null); return new TokenizerME(model).tokenize(label); } catch (Exception e) { log.warn("Unable to load configured TokenizerModel '"+modelName + "' for language '"+language + "! Fallback to default Tokenizers",e); } } //fallback to the defaults return openNlp.getTokenizer(language).tokenize(label); } else { //language not configured return null; } }
langConf.isLanguage(language)){ if(label.isEmpty()){ return EMPTY;
String language = getLanguage(this, ci, false); if (language == null || !indexConfig.getFstConfig().isLanguage(language)) { log.debug("Engine {} ignores ContentItem {} becuase language {} is not condigured.", new Object[] {getName(), ci.getUri(), language});
if(language != null && //successfully parsed language fstConfig.isLanguage(language) &&