/** * @deprecated Use * {@link #SDCrossValidator(String, TrainingParameters, * SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} * and pass in a {@link SentenceDetectorFactory}. */ public SDCrossValidator(String languageCode, TrainingParameters params) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null)); }
/** * TODO: was added in 1.5.3 -> remove * @deprecated Use * {@link #SentenceModel(String, MaxentModel, Map, SentenceDetectorFactory)} * instead and pass in a {@link SentenceDetectorFactory} */ public SentenceModel(String languageCode, MaxentModel sentModel, boolean useTokenEnd, Dictionary abbreviations, char[] eosCharacters, Map<String, String> manifestInfoEntries) { this(languageCode, sentModel, manifestInfoEntries, new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviations, eosCharacters)); }
/** * @deprecated use * {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, * SentenceDetectorEvaluationMonitor...)} * instead and pass in a TrainingParameters object. */ public SDCrossValidator(String languageCode, TrainingParameters params, SentenceDetectorEvaluationMonitor... listeners) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null), listeners); }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
public static SentenceDetectorFactory create(String subclassName, String languageCode, boolean useTokenEnd, Dictionary abbreviationDictionary, char[] eosCharacters) throws InvalidFormatException { if (subclassName == null) { // will create the default factory return new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); } try { SentenceDetectorFactory theFactory = ExtensionLoader .instantiateExtension(SentenceDetectorFactory.class, subclassName); theFactory.init(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); return theFactory; } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } }
/** * @deprecated use * {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, * SentenceDetectorEvaluationMonitor...)} * instead and pass in a TrainingParameters object. */ public SDCrossValidator(String languageCode, TrainingParameters params, SentenceDetectorEvaluationMonitor... listeners) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null), listeners); }
/** * @deprecated Use * {@link #SDCrossValidator(String, TrainingParameters, * SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} * and pass in a {@link SentenceDetectorFactory}. */ public SDCrossValidator(String languageCode, TrainingParameters params) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null)); }
/** * TODO: was added in 1.5.3 -> remove * @deprecated Use * {@link #SentenceModel(String, MaxentModel, Map, SentenceDetectorFactory)} * instead and pass in a {@link SentenceDetectorFactory} */ public SentenceModel(String languageCode, MaxentModel sentModel, boolean useTokenEnd, Dictionary abbreviations, char[] eosCharacters, Map<String, String> manifestInfoEntries) { this(languageCode, sentModel, manifestInfoEntries, new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviations, eosCharacters)); }
/** * @deprecated use * {@link #SDCrossValidator(String, TrainingParameters, SentenceDetectorFactory, * SentenceDetectorEvaluationMonitor...)} * instead and pass in a TrainingParameters object. */ public SDCrossValidator(String languageCode, TrainingParameters params, SentenceDetectorEvaluationMonitor... listeners) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null), listeners); }
/** * @deprecated Use * {@link #SDCrossValidator(String, TrainingParameters, * SentenceDetectorFactory, SentenceDetectorEvaluationMonitor...)} * and pass in a {@link SentenceDetectorFactory}. */ public SDCrossValidator(String languageCode, TrainingParameters params) { this(languageCode, params, new SentenceDetectorFactory(languageCode, true, null, null)); }
/** * TODO: was added in 1.5.3 -> remove * @deprecated Use * {@link #SentenceModel(String, MaxentModel, Map, SentenceDetectorFactory)} * instead and pass in a {@link SentenceDetectorFactory} */ public SentenceModel(String languageCode, MaxentModel sentModel, boolean useTokenEnd, Dictionary abbreviations, char[] eosCharacters, Map<String, String> manifestInfoEntries) { this(languageCode, sentModel, manifestInfoEntries, new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviations, eosCharacters)); }
private void sentenceCrossEval(TrainingParameters params, double expectedScore) throws IOException { ADSentenceSampleStream samples = new ADSentenceSampleStream( getLineSample(FLORESTA_VIRGEM), false); SDCrossValidator cv = new SDCrossValidator(LANG, params, new SentenceDetectorFactory(LANG, true, null, new Factory().getEOSCharacters(LANG))); cv.evaluate(samples, 10); System.out.println(cv.getFMeasure()); Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d); }
@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/sentdetect/SentencesInsufficient.txt"); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null); SentenceDetectorME.train("eng", new SentenceSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams); }
@Test public void testDefault() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
@Test public void testNullDict() throws IOException { Dictionary dic = null; char[] eos = {'.', '?'}; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); }
@Test public void testDefaultEOS() throws IOException { Dictionary dic = null; char[] eos = null; SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertNull(factory.getAbbreviationDictionary()); Assert.assertTrue(factory.getSDContextGenerator() instanceof DefaultSDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DefaultEndOfSentenceScanner); Assert.assertTrue(Arrays.equals(Factory.defaultEosCharacters, factory.getEOSCharacters())); }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
/** * @deprecated Use * {@link #train(String, ObjectStream, SentenceDetectorFactory, TrainingParameters)} * and pass in af {@link SentenceDetectorFactory}. */ public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, TrainingParameters mlParams) throws IOException { SentenceDetectorFactory sdFactory = new SentenceDetectorFactory( languageCode, useTokenEnd, abbreviations, null); return train(languageCode, samples, sdFactory, mlParams); }
public static SentenceDetectorFactory create(String subclassName, String languageCode, boolean useTokenEnd, Dictionary abbreviationDictionary, char[] eosCharacters) throws InvalidFormatException { if (subclassName == null) { // will create the default factory return new SentenceDetectorFactory(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); } try { SentenceDetectorFactory theFactory = ExtensionLoader .instantiateExtension(SentenceDetectorFactory.class, subclassName); theFactory.init(languageCode, useTokenEnd, abbreviationDictionary, eosCharacters); return theFactory; } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } }
mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null);