/** * Mapper which classifies the vectors to respective clusters. */ @Override protected void map(WritableComparable<?> key, VectorWritable vw, Context context) throws IOException, InterruptedException { if (!clusterModels.isEmpty()) { // Converting to NamedVectors to preserve the vectorId else its not obvious as to which point // belongs to which cluster - fix for MAHOUT-1410 Class<? extends Vector> vectorClass = vw.get().getClass(); Vector vector = vw.get(); if (!vectorClass.equals(NamedVector.class)) { if (key.getClass().equals(Text.class)) { vector = new NamedVector(vector, key.toString()); } else if (key.getClass().equals(IntWritable.class)) { vector = new NamedVector(vector, Integer.toString(((IntWritable) key).get())); } } Vector pdfPerCluster = clusterClassifier.classify(vector); if (shouldClassify(pdfPerCluster)) { if (emitMostLikely) { int maxValueIndex = pdfPerCluster.maxValueIndex(); write(new VectorWritable(vector), context, maxValueIndex, 1.0); } else { writeAllAboveThreshold(new VectorWritable(vector), context, pdfPerCluster); } } } }
/** * Mapper which classifies the vectors to respective clusters. */ @Override protected void map(WritableComparable<?> key, VectorWritable vw, Context context) throws IOException, InterruptedException { if (!clusterModels.isEmpty()) { // Converting to NamedVectors to preserve the vectorId else its not obvious as to which point // belongs to which cluster - fix for MAHOUT-1410 Class<? extends Vector> vectorClass = vw.get().getClass(); Vector vector = vw.get(); if (!vectorClass.equals(NamedVector.class)) { if (key.getClass().equals(Text.class)) { vector = new NamedVector(vector, key.toString()); } else if (key.getClass().equals(IntWritable.class)) { vector = new NamedVector(vector, Integer.toString(((IntWritable) key).get())); } } Vector pdfPerCluster = clusterClassifier.classify(vector); if (shouldClassify(pdfPerCluster)) { if (emitMostLikely) { int maxValueIndex = pdfPerCluster.maxValueIndex(); write(new VectorWritable(vector), context, maxValueIndex, 1.0); } else { writeAllAboveThreshold(new VectorWritable(vector), context, pdfPerCluster); } } } }
/** * Mapper which classifies the vectors to respective clusters. */ @Override protected void map(WritableComparable<?> key, VectorWritable vw, Context context) throws IOException, InterruptedException { if (!clusterModels.isEmpty()) { // Converting to NamedVectors to preserve the vectorId else its not obvious as to which point // belongs to which cluster - fix for MAHOUT-1410 Class<? extends Vector> vectorClass = vw.get().getClass(); Vector vector = vw.get(); if (!vectorClass.equals(NamedVector.class)) { if (key.getClass().equals(Text.class)) { vector = new NamedVector(vector, key.toString()); } else if (key.getClass().equals(IntWritable.class)) { vector = new NamedVector(vector, Integer.toString(((IntWritable) key).get())); } } Vector pdfPerCluster = clusterClassifier.classify(vector); if (shouldClassify(pdfPerCluster)) { if (emitMostLikely) { int maxValueIndex = pdfPerCluster.maxValueIndex(); write(new VectorWritable(vector), context, maxValueIndex, 1.0); } else { writeAllAboveThreshold(new VectorWritable(vector), context, pdfPerCluster); } } } }
Vector pdfPerCluster = clusterClassifier.classify(vector); if (shouldClassify(pdfPerCluster, clusterClassificationThreshold)) { classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, new VectorWritable(vector), pdfPerCluster);
Vector pdfPerCluster = clusterClassifier.classify(vector); if (shouldClassify(pdfPerCluster, clusterClassificationThreshold)) { classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, new VectorWritable(vector), pdfPerCluster);
Vector pdfPerCluster = clusterClassifier.classify(vector); if (shouldClassify(pdfPerCluster, clusterClassificationThreshold)) { classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, new VectorWritable(vector), pdfPerCluster);
@Override protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector probabilities = classifier.classify(value.get()); Vector selections = policy.select(probabilities); for (Element el : selections.nonZeroes()) { classifier.train(el.index(), value.get(), el.get()); } }
@Override protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector probabilities = classifier.classify(value.get()); Vector selections = policy.select(probabilities); for (Element el : selections.nonZeroes()) { classifier.train(el.index(), value.get(), el.get()); } }
@Override protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector probabilities = classifier.classify(value.get()); Vector selections = policy.select(probabilities); for (Element el : selections.nonZeroes()) { classifier.train(el.index(), value.get(), el.get()); } }
Vector vector = vw.get(); Vector probabilities = classifier.classify(vector);
Vector vector = vw.get(); Vector probabilities = classifier.classify(vector);
Vector vector = vw.get(); Vector probabilities = classifier.classify(vector);
@Test public void testDMClusterClassification() { ClusterClassifier classifier = newDMClassifier(); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.2,0.6,0.2]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.493,0.296,0.211]", AbstractCluster.formatVector(pdf, null)); }
@Test public void testSoftClusterClassification() { ClusterClassifier classifier = newSoftClusterClassifier(); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.0,1.0,0.0]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.735,0.184,0.082]", AbstractCluster.formatVector(pdf, null)); }
@Test public void testClusterClassification() { ClusterClassifier classifier = newKlusterClassifier(); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.2,0.6,0.2]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.493,0.296,0.211]", AbstractCluster.formatVector(pdf, null)); }
@Test public void testCosineKlusterClassification() { ClusterClassifier classifier = newCosineKlusterClassifier(); Vector pdf = classifier.classify(new DenseVector(2)); assertEquals("[0,0]", "[0.333,0.333,0.333]", AbstractCluster.formatVector(pdf, null)); pdf = classifier.classify(new DenseVector(2).assign(2)); assertEquals("[2,2]", "[0.429,0.429,0.143]", AbstractCluster.formatVector(pdf, null)); } }
Vector probabilities = classifier.classify(vector);
Vector probabilities = classifier.classify(vector);
Vector probabilities = classifier.classify(vector);