@Test public void testCorrectFactory() throws IOException { byte[] serialized = LanguageDetectorMETest.serializeModel(model); LanguageDetectorModel myModel = new LanguageDetectorModel(new ByteArrayInputStream(serialized)); Assert.assertTrue(myModel.getFactory() instanceof DummyFactory); }
public LanguageDetectorModel(MaxentModel langdetectModel, Map<String, String> manifestInfoEntries, LanguageDetectorFactory factory) { super(COMPONENT_NAME, "und", manifestInfoEntries, factory); artifactMap.put(LANGDETECT_MODEL_ENTRY_NAME, langdetectModel); checkArtifactMap(); }
@Override public String[] getSupportedLanguages() { int numberLanguages = model.getMaxentModel().getNumOutcomes(); String[] languages = new String[numberLanguages]; for (int i = 0; i < numberLanguages; i++) { languages[i] = model.getMaxentModel().getOutcome(i); } return languages; }
@Override protected LanguageDetectorModel loadModel(InputStream modelIn) throws IOException { return new LanguageDetectorModel(modelIn); }
/** * Initializes the current instance with a language detector model. Default feature * generation is used. * * @param model the language detector model */ public LanguageDetectorME(LanguageDetectorModel model) { this.model = model; this.mContextGenerator = model.getFactory().getContextGenerator(); }
protected static byte[] serializeModel(LanguageDetectorModel model) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); return out.toByteArray(); }
public static LanguageDetectorModel train(ObjectStream<LanguageSample> samples, TrainingParameters mlParams, LanguageDetectorFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); mlParams.putIfAbsent(AbstractEventTrainer.DATA_INDEXER_PARAM, AbstractEventTrainer.DATA_INDEXER_ONE_PASS_VALUE); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new LanguageDetectorEventStream(samples, factory.getContextGenerator())); return new LanguageDetectorModel(model, manifestInfoEntries, factory); } }
@Test public void testDummyFactoryContextGenerator() throws Exception { LanguageDetectorContextGenerator cg = model.getFactory().getContextGenerator(); String[] context = cg.getContext( "a dummy text phrase to test if the context generator works!!!!!!!!!!!!"); Set<String> set = new HashSet(Arrays.asList(context)); Assert.assertTrue(set.contains("!!!!!")); // default normalizer would remove the repeated ! Assert.assertTrue(set.contains("a dum")); Assert.assertTrue(set.contains("tg=[THE,CONTEXT,GENERATOR]")); }
@Test public void testDummyFactory() throws Exception { byte[] serialized = LanguageDetectorMETest.serializeModel(model); LanguageDetectorModel myModel = new LanguageDetectorModel(new ByteArrayInputStream(serialized)); Assert.assertTrue(myModel.getFactory() instanceof DummyFactory); }
@Test public void testLoadFromSerialized() throws IOException { byte[] serialized = serializeModel(model); LanguageDetectorModel myModel = new LanguageDetectorModel(new ByteArrayInputStream(serialized)); Assert.assertNotNull(myModel); }
/** * Initializes the current instance with a language detector model. Default feature * generation is used. * * @param model the language detector model */ public LanguageDetectorME(LanguageDetectorModel model) { this.model = model; this.mContextGenerator = model.getFactory().getContextGenerator(); }
@Override public Language[] predictLanguages(CharSequence content) { double[] eval = model.getMaxentModel().eval(mContextGenerator.getContext(content.toString())); Language[] arr = new Language[eval.length]; for (int i = 0; i < eval.length; i++) { arr[i] = new Language(model.getMaxentModel().getOutcome(i), eval[i]); } Arrays.sort(arr, (o1, o2) -> Double.compare(o2.getConfidence(), o1.getConfidence())); return arr; }
public LanguageDetectorModel(MaxentModel langdetectModel, Map<String, String> manifestInfoEntries, LanguageDetectorFactory factory) { super(COMPONENT_NAME, "und", manifestInfoEntries, factory); artifactMap.put(LANGDETECT_MODEL_ENTRY_NAME, langdetectModel); checkArtifactMap(); }
@Override protected LanguageDetectorModel loadModel(InputStream modelIn) throws IOException { return new LanguageDetectorModel(modelIn); }
/** * Initializes the current instance with a language detector model. Default feature * generation is used. * * @param model the language detector model */ public LanguageDetectorME(LanguageDetectorModel model) { this.model = model; this.mContextGenerator = model.getFactory().getContextGenerator(); }
@Override public String[] getSupportedLanguages() { int numberLanguages = model.getMaxentModel().getNumOutcomes(); String[] languages = new String[numberLanguages]; for (int i = 0; i < numberLanguages; i++) { languages[i] = model.getMaxentModel().getOutcome(i); } return languages; }
public LanguageDetectorModel(MaxentModel langdetectModel, Map<String, String> manifestInfoEntries, LanguageDetectorFactory factory) { super(COMPONENT_NAME, "und", manifestInfoEntries, factory); artifactMap.put(LANGDETECT_MODEL_ENTRY_NAME, langdetectModel); checkArtifactMap(); }
@Override protected LanguageDetectorModel loadModel(InputStream modelIn) throws IOException { return new LanguageDetectorModel(modelIn); }
@Override public String[] getSupportedLanguages() { int numberLanguages = model.getMaxentModel().getNumOutcomes(); String[] languages = new String[numberLanguages]; for (int i = 0; i < numberLanguages; i++) { languages[i] = model.getMaxentModel().getOutcome(i); } return languages; }
public static LanguageDetectorModel train(ObjectStream<LanguageSample> samples, TrainingParameters mlParams, LanguageDetectorFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); mlParams.putIfAbsent(AbstractEventTrainer.DATA_INDEXER_PARAM, AbstractEventTrainer.DATA_INDEXER_ONE_PASS_VALUE); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new LanguageDetectorEventStream(samples, factory.getContextGenerator())); return new LanguageDetectorModel(model, manifestInfoEntries, factory); } }