/** * @deprecated Use a {@link SentenceDetectorFactory} to extend * SentenceDetector functionality. */ public SentenceDetectorME(SentenceModel model, Factory factory) { this.model = model.getMaxentModel(); // if the model has custom EOS characters set, use this to get the context // generator and the EOS scanner; otherwise use language-specific defaults char[] customEOSCharacters = model.getEosCharacters(); if (customEOSCharacters == null) { cgen = factory.createSentenceContextGenerator(model.getLanguage(), getAbbreviations(model.getAbbreviations())); scanner = factory.createEndOfSentenceScanner(model.getLanguage()); } else { cgen = factory.createSentenceContextGenerator( getAbbreviations(model.getAbbreviations()), customEOSCharacters); scanner = factory.createEndOfSentenceScanner(customEOSCharacters); } useTokenEnd = model.useTokenEnd(); }
@Test public void testDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; SentenceModel sdModel = train(new DummySentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); ByteArrayOutputStream out = new ByteArrayOutputStream(); sdModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); SentenceModel fromSerialized = new SentenceModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); Assert.assertTrue(factory.getSDContextGenerator() instanceof DummySDContextGenerator); Assert.assertTrue(factory.getEndOfSentenceScanner() instanceof DummyEOSScanner); Assert.assertTrue(Arrays.equals(eos, factory.getEOSCharacters())); Assert.assertEquals(factory.getAbbreviationDictionary(), sdModel.getAbbreviations()); Assert.assertTrue(Arrays.equals(factory.getEOSCharacters(), sdModel.getEosCharacters())); }
/** * @deprecated Use a {@link SentenceDetectorFactory} to extend * SentenceDetector functionality. */ public SentenceDetectorME(SentenceModel model, Factory factory) { this.model = model.getMaxentModel(); // if the model has custom EOS characters set, use this to get the context // generator and the EOS scanner; otherwise use language-specific defaults char[] customEOSCharacters = model.getEosCharacters(); if (customEOSCharacters == null) { cgen = factory.createSentenceContextGenerator(model.getLanguage(), getAbbreviations(model.getAbbreviations())); scanner = factory.createEndOfSentenceScanner(model.getLanguage()); } else { cgen = factory.createSentenceContextGenerator( getAbbreviations(model.getAbbreviations()), customEOSCharacters); scanner = factory.createEndOfSentenceScanner(customEOSCharacters); } useTokenEnd = model.useTokenEnd(); }
/** * @deprecated Use a {@link SentenceDetectorFactory} to extend * SentenceDetector functionality. */ public SentenceDetectorME(SentenceModel model, Factory factory) { this.model = model.getMaxentModel(); // if the model has custom EOS characters set, use this to get the context // generator and the EOS scanner; otherwise use language-specific defaults char[] customEOSCharacters = model.getEosCharacters(); if (customEOSCharacters == null) { cgen = factory.createSentenceContextGenerator(model.getLanguage(), getAbbreviations(model.getAbbreviations())); scanner = factory.createEndOfSentenceScanner(model.getLanguage()); } else { cgen = factory.createSentenceContextGenerator( getAbbreviations(model.getAbbreviations()), customEOSCharacters); scanner = factory.createEndOfSentenceScanner(customEOSCharacters); } useTokenEnd = model.useTokenEnd(); }