private String featureGeneratorsAsString() { List<FeatureGenerator> fgs = Arrays.asList(getFeatureGenerators()); Iterator<FeatureGenerator> iter = fgs.iterator(); StringBuilder sb = new StringBuilder(); if (iter.hasNext()) { sb.append(iter.next().getClass().getCanonicalName()); while (iter.hasNext()) { sb.append(',').append(iter.next().getClass().getCanonicalName()); } } return sb.toString(); }
@Override public Map<String, String> createManifestEntries() { Map<String, String> manifestEntries = super.createManifestEntries(); if (getFeatureGenerators() != null) { manifestEntries.put(FEATURE_GENERATORS, featureGeneratorsAsString()); } return manifestEntries; }
/** * Initializes the current instance with a doccat model. Default feature * generation is used. * * @param model the doccat model */ public DocumentCategorizerME(DoccatModel model) { this.model = model; this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model .getFactory().getFeatureGenerators()); }
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples, TrainingParameters mlParams, DoccatFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); return new DoccatModel(languageCode, model, manifestInfoEntries, factory); } }
@Test public void testDefault() throws IOException { DoccatModel model = train(); Assert.assertNotNull(model); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); DoccatModel fromSerialized = new DoccatModel(in); DoccatFactory factory = fromSerialized.getFactory(); Assert.assertNotNull(factory); Assert.assertEquals(1, factory.getFeatureGenerators().length); Assert.assertEquals(BagOfWordsFeatureGenerator.class, factory.getFeatureGenerators()[0].getClass()); }
@Test public void testCustom() throws IOException { FeatureGenerator[] featureGenerators = { new BagOfWordsFeatureGenerator(), new NGramFeatureGenerator(), new NGramFeatureGenerator(2,3) }; DoccatFactory factory = new DoccatFactory(featureGenerators); DoccatModel model = train(factory); Assert.assertNotNull(model); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); DoccatModel fromSerialized = new DoccatModel(in); factory = fromSerialized.getFactory(); Assert.assertNotNull(factory); Assert.assertEquals(3, factory.getFeatureGenerators().length); Assert.assertEquals(BagOfWordsFeatureGenerator.class, factory.getFeatureGenerators()[0].getClass()); Assert.assertEquals(NGramFeatureGenerator.class, factory.getFeatureGenerators()[1].getClass()); Assert.assertEquals(NGramFeatureGenerator.class,factory.getFeatureGenerators()[2].getClass()); }
private String featureGeneratorsAsString() { List<FeatureGenerator> fgs = Arrays.asList(getFeatureGenerators()); Iterator<FeatureGenerator> iter = fgs.iterator(); StringBuilder sb = new StringBuilder(); if (iter.hasNext()) { sb.append(iter.next().getClass().getCanonicalName()); while (iter.hasNext()) { sb.append(',').append(iter.next().getClass().getCanonicalName()); } } return sb.toString(); }
private String featureGeneratorsAsString() { List<FeatureGenerator> fgs = Arrays.asList(getFeatureGenerators()); Iterator<FeatureGenerator> iter = fgs.iterator(); StringBuilder sb = new StringBuilder(); if (iter.hasNext()) { sb.append(iter.next().getClass().getCanonicalName()); while (iter.hasNext()) { sb.append(',').append(iter.next().getClass().getCanonicalName()); } } return sb.toString(); }
@Override public Map<String, String> createManifestEntries() { Map<String, String> manifestEntries = super.createManifestEntries(); if (getFeatureGenerators() != null) { manifestEntries.put(FEATURE_GENERATORS, featureGeneratorsAsString()); } return manifestEntries; }
@Override public Map<String, String> createManifestEntries() { Map<String, String> manifestEntries = super.createManifestEntries(); if (getFeatureGenerators() != null) { manifestEntries.put(FEATURE_GENERATORS, featureGeneratorsAsString()); } return manifestEntries; }
/** * Initializes the current instance with a doccat model. Default feature * generation is used. * * @param model the doccat model */ public DocumentCategorizerME(DoccatModel model) { this.model = model; this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model .getFactory().getFeatureGenerators()); }
/** * Initializes the current instance with a doccat model. Default feature * generation is used. * * @param model the doccat model */ public DocumentCategorizerME(DoccatModel model) { this.model = model; this.mContextGenerator = new DocumentCategorizerContextGenerator(this.model .getFactory().getFeatureGenerators()); }
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples, TrainingParameters mlParams, DoccatFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); return new DoccatModel(languageCode, model, manifestInfoEntries, factory); } }
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples, TrainingParameters mlParams, DoccatFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); return new DoccatModel(languageCode, model, manifestInfoEntries, factory); } }