static List<Pipe> getPipes() { List<Pipe> pipes = newArrayList(); pipes.add(new Target2Label()); pipes.add(new MyInput2RegexTokens()); // pipes.add(new PrintInputAndTarget()); pipes.add(new TokenSequence2FeatureSequence()); pipes.add(new FeatureSequence2FeatureVector()); return pipes; }
/** * Construct classifier pipe with given labels and stopwords * * @param initial pipe * @param stopwords to be removed */ public AbstractClassifierPipe(Pipe pipe, Collection<String> stopwords) { // @formatter:off super( ImmutableList.of( pipe, new CharSequenceLowercase(), new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")), new RemoveStopwords(stopwords), new TokenSequence2FeatureSequence(), new FeatureSequence2FeatureVector())); // @formatter:on } }
/** * Construct classifier pipe with given labels and stopwords * * @param initial pipe * @param stopwords to be removed */ public AbstractClassifierPipe(Pipe pipe, Collection<String> stopwords) { // @formatter:off super( ImmutableList.of( pipe, new CharSequenceLowercase(), new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")), new RemoveStopwords(stopwords), new TokenSequence2FeatureSequence(), new FeatureSequence2FeatureVector())); // @formatter:on } }
public Pipe createPipe () { return new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()}); }
public void testRandomTrained () { Pipe p = new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()}); double testAcc1 = testRandomTrainedOn (new InstanceList (p)); double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File("."))); assertEquals (testAcc1, testAcc2, 0.01); }
public Pipe createPipe () { return new SerialPipes (new Pipe[] { new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()}); }
public void testRandomTrained () { Pipe p = new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()}); double testAcc1 = testRandomTrainedOn (new InstanceList (p)); double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File("."))); assertEquals (testAcc1, testAcc2, 0.01); }
/** * Creates a list consisting of randomly-generated * <code>FeatureVector</code>s. */ // xxx Perhaps split these out into a utility class public InstanceList (Randoms r, // the generator of all random-ness used here Dirichlet classCentroidDistribution, // includes a Alphabet double classCentroidAverageAlphaMean, // Gaussian mean on the sum of alphas double classCentroidAverageAlphaVariance, // Gaussian variance on the sum of alphas double featureVectorSizePoissonLambda, double classInstanceCountPoissonLambda, String[] classNames) { this (new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()})); //classCentroidDistribution.print(); Iterator<Instance> iter = new RandomTokenSequenceIterator ( r, classCentroidDistribution, classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance, featureVectorSizePoissonLambda, classInstanceCountPoissonLambda, classNames); this.addThruPipe (iter); }
/** * Creates a list consisting of randomly-generated * <code>FeatureVector</code>s. */ // xxx Perhaps split these out into a utility class public InstanceList (Randoms r, // the generator of all random-ness used here Dirichlet classCentroidDistribution, // includes a Alphabet double classCentroidAverageAlphaMean, // Gaussian mean on the sum of alphas double classCentroidAverageAlphaVariance, // Gaussian variance on the sum of alphas double featureVectorSizePoissonLambda, double classInstanceCountPoissonLambda, String[] classNames) { this (new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()})); //classCentroidDistribution.print(); Iterator<Instance> iter = new RandomTokenSequenceIterator ( r, classCentroidDistribution, classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance, featureVectorSizePoissonLambda, classInstanceCountPoissonLambda, classNames); this.addThruPipe (iter); }
/** * Creates a list consisting of randomly-generated * <code>FeatureVector</code>s. */ // xxx Perhaps split these out into a utility class public InstanceList (Randoms r, // the generator of all random-ness used here Dirichlet classCentroidDistribution, // includes a Alphabet double classCentroidAverageAlphaMean, // Gaussian mean on the sum of alphas double classCentroidAverageAlphaVariance, // Gaussian variance on the sum of alphas double featureVectorSizePoissonLambda, double classInstanceCountPoissonLambda, String[] classNames) { this (new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()})); //classCentroidDistribution.print(); Iterator<Instance> iter = new RandomTokenSequenceIterator ( r, classCentroidDistribution, classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance, featureVectorSizePoissonLambda, classInstanceCountPoissonLambda, classNames); this.addThruPipe (iter); }
public void testThree () { InstanceList il = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequenceRemoveStopwords (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector () })); Iterator<Instance> pi = new FileIterator (new File("foo/bar"), null, Pattern.compile("^([^/]*)/")); il.addThruPipe (pi); }
public void testThree () { InstanceList il = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequenceRemoveStopwords (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector () })); Iterator<Instance> pi = new FileIterator (new File("foo/bar"), null, Pattern.compile("^([^/]*)/")); il.addThruPipe (pi); }
new Target2Label(labels), new TokenSequence2FeatureSequence(features), new FeatureSequence2FeatureVector()}); InstanceList trainingList = new InstanceList(instancePipe); trainingList.addThruPipe(data);
new Target2Label(labels), new TokenSequence2FeatureSequence(features), new FeatureSequence2FeatureVector()}); InstanceList trainingList = new InstanceList(instancePipe); trainingList.addThruPipe(data);
new Target2Label(labels), new TokenSequence2FeatureSequence(features), new FeatureSequence2FeatureVector()}); InstanceList trainingList = new InstanceList(instancePipe); trainingList.addThruPipe(data);
public void testStringTrained () { String[] africaTraining = new String[] { "on the plains of africa the lions roar", "in swahili ngoma means to dance", "nelson mandela became president of south africa", "the saraha dessert is expanding"}; String[] asiaTraining = new String[] { "panda bears eat bamboo", "china's one child policy has resulted in a surplus of boys", "tigers live in the jungle"}; InstanceList instances = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()})); instances.addThruPipe (new ArrayIterator (africaTraining, "africa")); instances.addThruPipe (new ArrayIterator (asiaTraining, "asia")); Classifier c = new NaiveBayesTrainer ().train (instances); Classification cf = c.classify ("nelson mandela never eats lions"); assertTrue (cf.getLabeling().getBestLabel() == ((LabelAlphabet)instances.getTargetAlphabet()).lookupLabel("africa")); }
public void testStringTrained () { String[] africaTraining = new String[] { "on the plains of africa the lions roar", "in swahili ngoma means to dance", "nelson mandela became president of south africa", "the saraha dessert is expanding"}; String[] asiaTraining = new String[] { "panda bears eat bamboo", "china's one child policy has resulted in a surplus of boys", "tigers live in the jungle"}; InstanceList instances = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()})); instances.addThruPipe (new ArrayIterator (africaTraining, "africa")); instances.addThruPipe (new ArrayIterator (asiaTraining, "asia")); Classifier c = new NaiveBayesTrainer ().train (instances); Classification cf = c.classify ("nelson mandela never eats lions"); assertTrue (cf.getLabeling().getBestLabel() == ((LabelAlphabet)instances.getTargetAlphabet()).lookupLabel("africa")); }
new TokenSequenceRemoveStopwords (),// Remove stopwords from sequence new TokenSequence2FeatureSequence(),// Replace each Token with a feature index new FeatureSequence2FeatureVector(),// Collapse word order into a "feature vector" new PrintInputAndTarget(), });
new TokenSequenceRemoveStopwords (),// Remove stopwords from sequence new TokenSequence2FeatureSequence(),// Replace each Token with a feature index new FeatureSequence2FeatureVector(),// Collapse word order into a "feature vector" new PrintInputAndTarget(), });
new TokenSequenceRemoveStopwords (),// Remove stopwords from sequence new TokenSequence2FeatureSequence(),// Replace each Token with a feature index new FeatureSequence2FeatureVector(),// Collapse word order into a "feature vector" new PrintInputAndTarget(), });