private static void classifyClusterSeq(Configuration conf, Path input, Path clusters, Path output, Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException { List<Cluster> clusterModels = populateClusterModels(clusters, conf); ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(conf, clusters)); ClusterClassifier clusterClassifier = new ClusterClassifier(clusterModels, policy); selectCluster(input, clusterModels, clusterClassifier, output, clusterClassificationThreshold, emitMostLikely); }
public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output, double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (runSequential) { classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely); } else { classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely); } }
WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable();
private static void classifyAndWrite(List<Cluster> clusterModels, Double clusterClassificationThreshold, boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException { Map<Text, Text> props = Maps.newHashMap(); if (emitMostLikely) { int maxValueIndex = pdfPerCluster.maxValueIndex(); WeightedPropertyVectorWritable weightedPropertyVectorWritable = new WeightedPropertyVectorWritable(pdfPerCluster.maxValue(), vw.get(), props); write(clusterModels, writer, weightedPropertyVectorWritable, maxValueIndex); } else { writeAllAboveThreshold(clusterModels, clusterClassificationThreshold, writer, vw, pdfPerCluster); } }
private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
private void runClassificationWithOutlierRemoval(boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassificationDriver.run(getConfiguration(), pointsPath, clusteringOutputPath, classifiedOutputPath, 0.73, true, runSequential); }
@Override public void close(ClusterClassifier posterior) { for (Cluster cluster : posterior.getModels()) { cluster.computeParameters(); } }
private void assertVectorsWithoutOutlierRemoval() { assertFirstClusterWithoutOutlierRemoval(); assertSecondClusterWithoutOutlierRemoval(); assertThirdClusterWithoutOutlierRemoval(); }
private void assertVectorsWithOutlierRemoval() { checkClustersWithOutlierRemoval(); }
private static void classifyClusterSeq(Configuration conf, Path input, Path clusters, Path output, Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException { List<Cluster> clusterModels = populateClusterModels(clusters, conf); ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(conf, clusters)); ClusterClassifier clusterClassifier = new ClusterClassifier(clusterModels, policy); selectCluster(input, clusterModels, clusterClassifier, output, clusterClassificationThreshold, emitMostLikely); }
private static void classifyAndWrite(List<Cluster> clusterModels, Double clusterClassificationThreshold, boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException { Map<Text, Text> props = new HashMap<>(); if (emitMostLikely) { int maxValueIndex = pdfPerCluster.maxValueIndex(); WeightedPropertyVectorWritable weightedPropertyVectorWritable = new WeightedPropertyVectorWritable(pdfPerCluster.maxValue(), vw.get(), props); write(clusterModels, writer, weightedPropertyVectorWritable, maxValueIndex); } else { writeAllAboveThreshold(clusterModels, clusterClassificationThreshold, writer, vw, pdfPerCluster); } }
private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output, double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (runSequential) { classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely); } else { classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely); } }
private void runClassificationWithoutOutlierRemoval() throws IOException, InterruptedException, ClassNotFoundException { ClusterClassificationDriver.run(getConfiguration(), pointsPath, clusteringOutputPath, classifiedOutputPath, 0.0, true, true); }
@Override public void close(ClusterClassifier posterior) { for (Cluster cluster : posterior.getModels()) { cluster.computeParameters(); } }
private static void classifyClusterSeq(Configuration conf, Path input, Path clusters, Path output, Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException { List<Cluster> clusterModels = populateClusterModels(clusters, conf); ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(conf, clusters)); ClusterClassifier clusterClassifier = new ClusterClassifier(clusterModels, policy); selectCluster(input, clusterModels, clusterClassifier, output, clusterClassificationThreshold, emitMostLikely); }
private static void classifyAndWrite(List<Cluster> clusterModels, Double clusterClassificationThreshold, boolean emitMostLikely, SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException { Map<Text, Text> props = Maps.newHashMap(); if (emitMostLikely) { int maxValueIndex = pdfPerCluster.maxValueIndex(); WeightedPropertyVectorWritable weightedPropertyVectorWritable = new WeightedPropertyVectorWritable(pdfPerCluster.maxValue(), vw.get(), props); write(clusterModels, writer, weightedPropertyVectorWritable, maxValueIndex); } else { writeAllAboveThreshold(clusterModels, clusterClassificationThreshold, writer, vw, pdfPerCluster); } }
private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output, double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (runSequential) { classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely); } else { classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely); } }
@Override public void close(ClusterClassifier posterior) { for (Cluster cluster : posterior.getModels()) { cluster.computeParameters(); } }