protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
@Override public void init(SeqClassifierFlags flags) { String options = "tokenizeNLs=false,invertible=true"; if (flags.tokenizerOptions != null) { options = options + ',' + flags.tokenizerOptions; } TokenizerFactory<IN> factory; if (flags.tokenizerFactory != null) { try { Class<TokenizerFactory<? extends HasWord>> clazz = ErasureUtils.uncheckedCast(Class.forName(flags.tokenizerFactory)); Method factoryMethod = clazz.getMethod("newCoreLabelTokenizerFactory", String.class); factory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null, options)); } catch (Exception e) { throw new RuntimeException(e); } } else { factory = ErasureUtils.uncheckedCast(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory(options)); } init(flags, factory); }
DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text)); PTBTokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizerFactory .newCoreLabelTokenizerFactory("ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false"); dp.setTokenizerFactory(tokenizerFactory);
DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text)); PTBTokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizerFactory .newCoreLabelTokenizerFactory("ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false"); dp.setTokenizerFactory(tokenizerFactory);
DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text)); PTBTokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizerFactory .newCoreLabelTokenizerFactory("ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false"); dp.setTokenizerFactory(tokenizerFactory);
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, java.lang.reflect.InvocationTargetException { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
protected static TokenizerFactory<? extends HasWord> chooseTokenizerFactory(boolean tokenize, String tokenizerFactory, String tokenizerOptions, boolean invertible) { if (tokenize && tokenizerFactory.trim().length() != 0) { //return (TokenizerFactory<? extends HasWord>) Class.forName(getTokenizerFactory()).newInstance(); try { @SuppressWarnings({"unchecked"}) Class<TokenizerFactory<? extends HasWord>> clazz = (Class<TokenizerFactory<? extends HasWord>>) Class.forName(tokenizerFactory.trim()); Method factoryMethod = clazz.getMethod("newTokenizerFactory"); @SuppressWarnings({"unchecked"}) TokenizerFactory<? extends HasWord> factory = (TokenizerFactory<? extends HasWord>) factoryMethod.invoke(tokenizerOptions); return factory; } catch (Exception e) { throw new RuntimeException("Could not load tokenizer factory", e); } } else if (tokenize) { if (invertible) { if (tokenizerOptions.equals("")) { tokenizerOptions = "invertible=true"; } else if (!tokenizerOptions.matches("(^|.*,)invertible=true")) { tokenizerOptions += ",invertible=true"; } return PTBTokenizerFactory.newCoreLabelTokenizerFactory(tokenizerOptions); } else { return PTBTokenizerFactory.newWordTokenizerFactory(tokenizerOptions); } } else { return WhitespaceTokenizer.factory(); } }
public void init(SeqClassifierFlags flags) { String options = "tokenizeNLs=false,invertible=true"; if (flags.tokenizerOptions != null) { options = options + "," + flags.tokenizerOptions; } TokenizerFactory<IN> factory; if (flags.tokenizerFactory != null) { try { Class<TokenizerFactory<? extends HasWord>> clazz = ErasureUtils.uncheckedCast(Class.forName(flags.tokenizerFactory)); Method factoryMethod = clazz.getMethod("newCoreLabelTokenizerFactory", String.class); factory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null, options)); } catch (Exception e) { throw new RuntimeException(e); } } else { factory = ErasureUtils.uncheckedCast(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory(options)); } init(flags, factory); }
@Override public void init(SeqClassifierFlags flags) { String options = "tokenizeNLs=false,invertible=true"; if (flags.tokenizerOptions != null) { options = options + ',' + flags.tokenizerOptions; } TokenizerFactory<IN> factory; if (flags.tokenizerFactory != null) { try { Class<TokenizerFactory<? extends HasWord>> clazz = ErasureUtils.uncheckedCast(Class.forName(flags.tokenizerFactory)); Method factoryMethod = clazz.getMethod("newCoreLabelTokenizerFactory", String.class); factory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null, options)); } catch (Exception e) { throw new RuntimeException(e); } } else { factory = ErasureUtils.uncheckedCast(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory(options)); } init(flags, factory); }
private List<Span> computeSentenceList(Tuple inputTuple) { String inputText = inputTuple.<IField>getField(predicate.getInputAttributeName()).getValue().toString(); Reader reader = new StringReader(inputText); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(reader); documentPreprocessor.setTokenizerFactory(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory("ptb3Escaping=false")); List<Span> sentenceList = new ArrayList<Span>(); int start = 0; int end = 0; String key=PropertyNameConstants.NLP_SPLIT_KEY; String attributeName = predicate.getInputAttributeName(); for (List<HasWord> sentence : documentPreprocessor) { String sentenceText = Sentence.listToString(sentence); //Make span end = start + sentenceText.length(); Span span = new Span(attributeName, start, end, key, sentenceText); sentenceList.add(span); start = end + 1; } return sentenceList; }
@Override public void init(SeqClassifierFlags flags) { String options = "tokenizeNLs=false,invertible=true"; if (flags.tokenizerOptions != null) { options = options + ',' + flags.tokenizerOptions; } TokenizerFactory<IN> factory; if (flags.tokenizerFactory != null) { try { Class<TokenizerFactory<? extends HasWord>> clazz = ErasureUtils.uncheckedCast(Class.forName(flags.tokenizerFactory)); Method factoryMethod = clazz.getMethod("newCoreLabelTokenizerFactory", String.class); factory = ErasureUtils.uncheckedCast(factoryMethod.invoke(null, options)); } catch (Exception e) { throw new RuntimeException(e); } } else { factory = ErasureUtils.uncheckedCast(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory(options)); } init(flags, factory); }