private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
/** * Run the job using supplied arguments * * @param input * the directory pathname for input points * @param clustersIn * the directory pathname for input clusters * @param output * the directory pathname for output points * @param clusterClassificationThreshold * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors * having pdf below this value will not be clustered. * @param runSequential * if true execute sequential algorithm */ public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (log.isInfoEnabled()) { log.info("Running Clustering"); log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output); } ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
/** * Run the job using supplied arguments * * @param input * the directory pathname for input points * @param clustersIn * the directory pathname for input clusters * @param output * the directory pathname for output points * @param clusterClassificationThreshold * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors * having pdf below this value will not be clustered. * @param runSequential * if true execute sequential algorithm */ public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (log.isInfoEnabled()) { log.info("Running Clustering"); log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output); } ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
/** * Run the job using supplied arguments * * @param input * the directory pathname for input points * @param clustersIn * the directory pathname for input clusters * @param output * the directory pathname for output points * @param clusterClassificationThreshold * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors * having pdf below this value will not be clustered. * @param runSequential * if true execute sequential algorithm */ public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (log.isInfoEnabled()) { log.info("Running Clustering"); log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output); } ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely, runSequential);
private void runClassificationWithOutlierRemoval(boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassificationDriver.run(getConfiguration(), pointsPath, clusteringOutputPath, classifiedOutputPath, 0.73, true, runSequential); }
ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely, runSequential);
ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely, runSequential);
private void runClassificationWithoutOutlierRemoval() throws IOException, InterruptedException, ClassNotFoundException { ClusterClassificationDriver.run(getConfiguration(), pointsPath, clusteringOutputPath, classifiedOutputPath, 0.0, true, true); }
run(getConf(), input, clustersIn, output, clusterClassificationThreshold, true, runSequential);
run(getConf(), input, clustersIn, output, clusterClassificationThreshold, true, runSequential);
run(getConf(), input, clustersIn, output, clusterClassificationThreshold, true, runSequential);