@Override public void missclassified(TokenSample reference, TokenSample prediction) { printError(reference, prediction); }
public String getHelp() { return "Usage: " + CLI.CMD + " " + getName() + " < sentences"; }
public String getHelp() { return "Usage: " + CLI.CMD + " " + getName() + " model < sentences"; }
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); if (mlParams == null) { mlParams = ModelUtil.createDefaultTrainingParameters(); if (params.getMisclassified()) { listener = new TokenEvaluationErrorListener(); Dictionary dict = TokenizerTrainerTool.loadDict(params.getAbbDict()); params.getFactory(), params.getLang(), dict, params.getAlphaNumOpt(), null); validator = new opennlp.tools.tokenize.TokenizerCrossValidator(mlParams, tokFactory, listener); validator.evaluate(sampleStream, params.getFolds()); throw createTerminationIOException(e);
public void run(String format, String[] args) { super.run(format, args); mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), false); throw new TerminateToolException(1, "Training parameters file '" + params.getParams() + "' is invalid!"); File modelOutFile = params.getModel(); CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile); Dictionary dict = loadDict(params.getAbbDict()); params.getFactory(), params.getLang(), dict, params.getAlphaNumOpt(), null); model = opennlp.tools.tokenize.TokenizerME.train(sampleStream, tokFactory, mlParams); throw createTerminationIOException(e);
public void run(String[] args) { if (args.length != 1) { System.out.println(getHelp()); } else { TokenizerModel model = new TokenizerModelLoader().load(new File(args[0])); CommandLineTokenizer tokenizer = new CommandLineTokenizer(new opennlp.tools.tokenize.TokenizerME(model)); tokenizer.process(); } } }
public void run(String format, String[] args) { super.run(format, args); TokenizerModel model = new TokenizerModelLoader().load(params.getModel()); if (params.getMisclassified()) { misclassifiedListener = new TokenEvaluationErrorListener();
public void run(String[] args) { if (args.length != 0) { System.out.println(getHelp()); } else { CommandLineTokenizer tokenizer = new CommandLineTokenizer(opennlp.tools.tokenize.SimpleTokenizer.INSTANCE); tokenizer.process(); } } }
public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel()); Tokenizer tokenizer = new TokenizerME(tokenizerModel); ObjectStream<String> mucDocStream = new FileToStringSampleStream( new DirectorySampleStream(params.getData(), file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), StandardCharsets.UTF_8); return new MucNameSampleStream(tokenizer, mucDocStream); }
public void run(String[] args) { if (args.length != 1) { System.out.println(getHelp()); } else { try { Detokenizer detokenizer = new DictionaryDetokenizer( new DetokenizationDictionaryLoader().load(new File(args[0])));
@Test public void testPositive() throws InvalidFormatException { OutputStream stream = new ByteArrayOutputStream(); TokenizerEvaluationMonitor listener = new TokenEvaluationErrorListener(stream); TokenizerEvaluator eval = new TokenizerEvaluator(new DummyTokenizer( TokenSampleTest.createGoldSample()), listener); eval.evaluateSample(TokenSampleTest.createGoldSample()); Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0); Assert.assertEquals(0, stream.toString().length()); }
public String getHelp() { return "Usage: " + CLI.CMD + " " + getName() + " detokenizerDictionary"; }
@Override public void missclassified(TokenSample reference, TokenSample prediction) { printError(reference.getTokenSpans(), prediction.getTokenSpans(), reference, prediction, reference.getText()); }
@Test public void testNegative() throws InvalidFormatException { OutputStream stream = new ByteArrayOutputStream(); DetokenEvaluationErrorListener listener = new DetokenEvaluationErrorListener( stream); DetokenizerEvaluator eval = new DetokenizerEvaluator(new DummyDetokenizer( TokenSampleTest.createGoldSample()), listener); eval.evaluateSample(TokenSampleTest.createPredSilverSample()); Assert.assertEquals(-1.0d, eval.getFMeasure().getFMeasure(), .1d); Assert.assertNotSame(0, stream.toString().length()); }
public void run(String[] args) { if (args.length != 0) { System.out.println(getHelp()); } else { CommandLineTokenizer tokenizer = new CommandLineTokenizer(opennlp.tools.tokenize.SimpleTokenizer.INSTANCE); tokenizer.process(); } } }
String tokenizerModelName = CmdLineUtil.getParameter( "-tk", args ); if (tokenizerModelName != null ) { TokenizerModel tokenizerModel = new TokenizerModelLoader().load(new File(tokenizerModelName)); tokenizer = new TokenizerME( tokenizerModel );
@Test public void testNegative() throws InvalidFormatException { OutputStream stream = new ByteArrayOutputStream(); TokenizerEvaluationMonitor listener = new TokenEvaluationErrorListener( stream); TokenizerEvaluator eval = new TokenizerEvaluator(new DummyTokenizer( TokenSampleTest.createGoldSample()), listener); eval.evaluateSample(TokenSampleTest.createPredSample()); Assert.assertEquals(.5d, eval.getFMeasure().getFMeasure(), .1d); Assert.assertNotSame(0, stream.toString().length()); }
public String getHelp() { return "Usage: " + CLI.CMD + " " + getName() + " detokenizerDictionary"; }
@Test public void testPositive() throws InvalidFormatException { OutputStream stream = new ByteArrayOutputStream(); DetokenEvaluationErrorListener listener = new DetokenEvaluationErrorListener(stream); DetokenizerEvaluator eval = new DetokenizerEvaluator(new DummyDetokenizer( TokenSampleTest.createGoldSample()), listener); eval.evaluateSample(TokenSampleTest.createGoldSample()); Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0); Assert.assertEquals(0, stream.toString().length()); }
public void run(String[] args) { if (args.length != 0) { System.out.println(getHelp()); } else { CommandLineTokenizer tokenizer = new CommandLineTokenizer(opennlp.tools.tokenize.SimpleTokenizer.INSTANCE); tokenizer.process(); } } }