protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
protected TaggerWrapper(MaxentTagger tagger) { this.tagger = tagger; this.config = tagger.config; try { tokenizerFactory = chooseTokenizerFactory(config.getTokenize(), config.getTokenizerFactory(), config.getTokenizerOptions(), config.getTokenizerInvertible()); } catch (Exception e) { log.info("Error in tokenizer factory instantiation for class: " + config.getTokenizerFactory()); e.printStackTrace(); tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(config.getTokenizerOptions()); } outputStyle = OutputStyle.fromShortName(config.getOutputFormat()); outputVerbosity = config.getOutputVerbosity(); outputLemmas = config.getOutputLemmas(); morpha = (outputLemmas) ? new Morphology() : null; tokenize = config.getTokenize(); // tagSeparator = config.getTagSeparator(); }
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
tokenizerFactory = PTBTokenizer.PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions);
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
System.err.println("Error in tokenizer factory instantiation for class: " + config.getTokenizerFactory()); e.printStackTrace(); tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(config.getTokenizerOptions()); tagSeparator = config.getTagSeparator(); } else { tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(""); outputStyle = PlainTextDocumentReaderAndWriter.OutputStyle.SLASH_TAGS; outputVerbosity = false;
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, java.lang.reflect.InvocationTargetException { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
protected TaggerWrapper(MaxentTagger tagger) { this.tagger = tagger; this.config = tagger.config; try { tokenizerFactory = chooseTokenizerFactory(config.getTokenize(), config.getTokenizerFactory(), config.getTokenizerOptions(), config.getTokenizerInvertible()); } catch (Exception e) { System.err.println("Error in tokenizer factory instantiation for class: " + config.getTokenizerFactory()); e.printStackTrace(); tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(config.getTokenizerOptions()); } outputStyle = OutputStyle.fromShortName(config.getOutputFormat()); outputVerbosity = config.getOutputVerbosity(); outputLemmas = config.getOutputLemmas(); morpha = (outputLemmas) ? new Morphology() : null; tokenize = config.getTokenize(); tagSeparator = config.getTagSeparator(); }
protected TaggerWrapper(MaxentTagger tagger) { this.tagger = tagger; this.config = tagger.config; try { tokenizerFactory = chooseTokenizerFactory(config.getTokenize(), config.getTokenizerFactory(), config.getTokenizerOptions(), config.getTokenizerInvertible()); } catch (Exception e) { log.info("Error in tokenizer factory instantiation for class: " + config.getTokenizerFactory()); e.printStackTrace(); tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(config.getTokenizerOptions()); } outputStyle = OutputStyle.fromShortName(config.getOutputFormat()); outputVerbosity = config.getOutputVerbosity(); outputLemmas = config.getOutputLemmas(); morpha = (outputLemmas) ? new Morphology() : null; tokenize = config.getTokenize(); // tagSeparator = config.getTagSeparator(); }
protected TaggerWrapper(MaxentTagger tagger) { this.tagger = tagger; this.config = tagger.config; try { tokenizerFactory = chooseTokenizerFactory(config.getTokenize(), config.getTokenizerFactory(), config.getTokenizerOptions(), config.getTokenizerInvertible()); } catch (Exception e) { log.info("Error in tokenizer factory instantiation for class: " + config.getTokenizerFactory()); e.printStackTrace(); tokenizerFactory = PTBTokenizerFactory.newWordTokenizerFactory(config.getTokenizerOptions()); } outputStyle = OutputStyle.fromShortName(config.getOutputFormat()); outputVerbosity = config.getOutputVerbosity(); outputLemmas = config.getOutputLemmas(); morpha = (outputLemmas) ? new Morphology() : null; tokenize = config.getTokenize(); // tagSeparator = config.getTagSeparator(); }