/** * Construct classifier pipe with given labels and stopwords * * @param stopwords to be removed */ public ClassifierPipe(Collection<String> stopwords) { super(new Target2Label(), stopwords); } }
public Instance pipe (Instance carrier) { if (carrier.getTarget() != null) { if (carrier.getTarget() instanceof Label) throw new IllegalArgumentException ("Already a label."); LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet(); carrier.setTarget(ldict.lookupLabel (carrier.getTarget())); } return carrier; }
private Pipe makePipe() { Alphabet alpha = new Alphabet(); Target2Label labelPipe = new Target2Label(); LabelAlphabet labelAlpha = (LabelAlphabet) labelPipe.getTargetAlphabet(); return new SerialPipes(ImmutableList.of( new AlignToStressPipe(alpha, labelAlpha, ImmutableList.<StressFeature>of() ), // convert to token sequence new TokenSequenceLowercase(), // make all lowercase new NeighborTokenFeature(true, makeNeighbors()), // grab neighboring graphemes new SurroundingTokenFeature(false), new SurroundingTokenFeature(true), new NeighborShapeFeature(true, makeShapeNeighs()), new LeadingTrailingFeature(), new TokenSequenceToFeature(), // convert the strings in the text to features new TokenSequence2FeatureVectorSequence(alpha, true, false), labelPipe )); }
/** * Construct classifier pipe with given labels and stopwords * * @param stopwords to be removed */ public ClassifierPipe(Collection<String> stopwords) { super(new Target2Label(), stopwords); } }
public Instance pipe (Instance carrier) { if (carrier.getTarget() != null) { if (carrier.getTarget() instanceof Label) throw new IllegalArgumentException ("Already a label."); LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet(); carrier.setTarget(ldict.lookupLabel (carrier.getTarget())); } return carrier; }
static List<Pipe> getPipes() { List<Pipe> pipes = newArrayList(); pipes.add(new Target2Label()); pipes.add(new MyInput2RegexTokens()); // pipes.add(new PrintInputAndTarget()); pipes.add(new TokenSequence2FeatureSequence()); pipes.add(new FeatureSequence2FeatureVector()); return pipes; }
public Instance pipe (Instance carrier) { if (carrier.getTarget() != null) { if (carrier.getTarget() instanceof Label) throw new IllegalArgumentException ("Already a label."); LabelAlphabet ldict = (LabelAlphabet) getTargetAlphabet(); carrier.setTarget(ldict.lookupLabel (carrier.getTarget())); } return carrier; }
public InstanceList createInstanceList(File dataFile) throws IOException { InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[] { new Target2Label(), new Csv2FeatureVector() })); Reader fileReader = new FileReader(dataFile); instanceList.addThruPipe(new DataIterator(fileReader)); fileReader.close(); return instanceList; }
public InstanceList createInstanceList(File dataFile) throws IOException { InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[] { new Target2Label(), new Csv2FeatureVector() })); Reader fileReader = new FileReader(dataFile); instanceList.addThruPipe(new DataIterator(fileReader)); fileReader.close(); return instanceList; }
public void testRandomTrained () { Pipe p = new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()}); double testAcc1 = testRandomTrainedOn (new InstanceList (p)); double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File("."))); assertEquals (testAcc1, testAcc2, 0.01); }
public void testRandomTrained () { Pipe p = new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()}); double testAcc1 = testRandomTrainedOn (new InstanceList (p)); double testAcc2 = testRandomTrainedOn (new PagedInstanceList (p, 700, 200, new File("."))); assertEquals (testAcc1, testAcc2, 0.01); }
/** * Creates a list consisting of randomly-generated * <code>FeatureVector</code>s. */ // xxx Perhaps split these out into a utility class public InstanceList (Randoms r, // the generator of all random-ness used here Dirichlet classCentroidDistribution, // includes a Alphabet double classCentroidAverageAlphaMean, // Gaussian mean on the sum of alphas double classCentroidAverageAlphaVariance, // Gaussian variance on the sum of alphas double featureVectorSizePoissonLambda, double classInstanceCountPoissonLambda, String[] classNames) { this (new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()})); //classCentroidDistribution.print(); Iterator<Instance> iter = new RandomTokenSequenceIterator ( r, classCentroidDistribution, classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance, featureVectorSizePoissonLambda, classInstanceCountPoissonLambda, classNames); this.addThruPipe (iter); }
/** * Creates a list consisting of randomly-generated * <code>FeatureVector</code>s. */ // xxx Perhaps split these out into a utility class public InstanceList (Randoms r, // the generator of all random-ness used here Dirichlet classCentroidDistribution, // includes a Alphabet double classCentroidAverageAlphaMean, // Gaussian mean on the sum of alphas double classCentroidAverageAlphaVariance, // Gaussian variance on the sum of alphas double featureVectorSizePoissonLambda, double classInstanceCountPoissonLambda, String[] classNames) { this (new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()})); //classCentroidDistribution.print(); Iterator<Instance> iter = new RandomTokenSequenceIterator ( r, classCentroidDistribution, classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance, featureVectorSizePoissonLambda, classInstanceCountPoissonLambda, classNames); this.addThruPipe (iter); }
/** * Creates a list consisting of randomly-generated * <code>FeatureVector</code>s. */ // xxx Perhaps split these out into a utility class public InstanceList (Randoms r, // the generator of all random-ness used here Dirichlet classCentroidDistribution, // includes a Alphabet double classCentroidAverageAlphaMean, // Gaussian mean on the sum of alphas double classCentroidAverageAlphaVariance, // Gaussian variance on the sum of alphas double featureVectorSizePoissonLambda, double classInstanceCountPoissonLambda, String[] classNames) { this (new SerialPipes (new Pipe[] { new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector (), new Target2Label()})); //classCentroidDistribution.print(); Iterator<Instance> iter = new RandomTokenSequenceIterator ( r, classCentroidDistribution, classCentroidAverageAlphaMean, classCentroidAverageAlphaVariance, featureVectorSizePoissonLambda, classInstanceCountPoissonLambda, classNames); this.addThruPipe (iter); }
public void testThree () { InstanceList il = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequenceRemoveStopwords (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector () })); Iterator<Instance> pi = new FileIterator (new File("foo/bar"), null, Pattern.compile("^([^/]*)/")); il.addThruPipe (pi); }
Pipe instancePipe = new SerialPipes (new Pipe[] { new Target2Label(labels), new TokenSequence2FeatureSequence(features), new FeatureSequence2FeatureVector()});
public void testThree () { InstanceList il = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequenceLowercase (), new TokenSequenceRemoveStopwords (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector () })); Iterator<Instance> pi = new FileIterator (new File("foo/bar"), null, Pattern.compile("^([^/]*)/")); il.addThruPipe (pi); }
Pipe instancePipe = new SerialPipes (new Pipe[] { new Target2Label(labels), new TokenSequence2FeatureSequence(features), new FeatureSequence2FeatureVector()});
Pipe instancePipe = new SerialPipes (new Pipe[] { new Target2Label(labels), new TokenSequence2FeatureSequence(features), new FeatureSequence2FeatureVector()});
pipeList.add(new Target2Label());