/** * Generate a DataSpark with the given number of samples and attributes (discrete and continuous). * @param jsc, JavaSparkContext * @param seed, the seed of the random number generator. * @param nSamples, the number of samples of the data stream. * @param nDiscreteAtts, the number of discrete attributes. * @param nContinuousAttributes, the number of continuous attributes. * @return A valid {@code DataSpark} object. */ public static DataSpark generate(JavaSparkContext jsc, int seed, int nSamples, int nDiscreteAtts, int nContinuousAttributes){ BayesianNetworkGenerator.setSeed(seed); BayesianNetworkGenerator.setNumberOfGaussianVars(nContinuousAttributes); BayesianNetworkGenerator.setNumberOfMultinomialVars(nDiscreteAtts,2); int nTotal = nDiscreteAtts+nContinuousAttributes; int nLinksMin = nTotal-1; int nLinksMax = nTotal*(nTotal-1)/2; BayesianNetworkGenerator.setNumberOfLinks((int)(0.8*nLinksMin + 0.2*nLinksMax)); BayesianNetwork bn = BayesianNetworkGenerator.generateBayesianNetwork(); // Sample from the BN int parallelism = 4; BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); return sampler.sampleToDataSpark(jsc, nSamples, parallelism); }
public static void runParallelKMeans() throws IOException { DataStream<DataInstance> data; if(isSampleData()) { BayesianNetworkGenerator.setNumberOfGaussianVars(getNumGaussVars()); BayesianNetworkGenerator.setNumberOfMultinomialVars(getNumDiscVars(), getNumStates()); BayesianNetwork bn = BayesianNetworkGenerator.generateBayesianNetwork(); data = new BayesianNetworkSampler(bn).sampleToDataStream(getSampleSize()); DataStreamWriter.writeDataToFile(data, pathToFile); } data = DataStreamLoader.open(pathToFile); ParallelKMeans.setBatchSize(batchSize); double[][] centroids = ParallelKMeans.learnKMeans(getK(),data); for (int clusterID = 0; clusterID < centroids.length; clusterID++) { System.out.println("Cluster "+(clusterID+1)+": "+Arrays.toString(centroids[clusterID])); } }
BayesianNetwork bn = BayesianNetworkGenerator.generateBayesianNetwork(); DataStream<DataInstance> data = new BayesianNetworkSampler(bn).sampleToDataStream(nbrSamples);
/** * Generate a DataFlink with the given number of samples and attributes (discrete and continuous). * @param seed, the seed of the random number generator. * @param nSamples, the number of samples of the data stream. * @param nDiscreteAtts, the number of discrete attributes. * @param nContinuousAttributes, the number of continuous attributes. * @return A valid {@code DataStream} object. */ public static DataFlink<DataInstance> generate(ExecutionEnvironment env, int seed, int nSamples, int nDiscreteAtts, int nContinuousAttributes){ BayesianNetworkGenerator.setSeed(seed); BayesianNetworkGenerator.setNumberOfGaussianVars(nContinuousAttributes); BayesianNetworkGenerator.setNumberOfMultinomialVars(nDiscreteAtts,2); int nTotal = nDiscreteAtts+nContinuousAttributes; int nLinksMin = nTotal-1; int nLinksMax = nTotal*(nTotal-1)/2; BayesianNetworkGenerator.setNumberOfLinks((int)(0.8*nLinksMin + 0.2*nLinksMax)); BayesianNetworkSampler sampler = new BayesianNetworkSampler(BayesianNetworkGenerator.generateBayesianNetwork()); sampler.setSeed(seed); return sampler.sampleToDataFlink(env,nSamples); }
BayesianNetworkGenerator.setNumberOfGaussianVars(nCVars); BayesianNetworkGenerator.setNumberOfMultinomialVars(nMVars, 2); BayesianNetwork originalBnet = BayesianNetworkGenerator.generateBayesianNetwork();
BayesianNetworkGenerator.setNumberOfLinks(1); BayesianNetwork bn = BayesianNetworkGenerator.generateBayesianNetwork();