public static void main(String[] args) throws IOException { DataStream<DataInstance> data = eu.amidst.core.utils.DataSetGenerator.generate(0,1000,0,10); DataStreamWriter.writeDataToFile(data, "./datasets/artificialDataset.arff"); // DataStream<DynamicDataInstance> dataDynamic = eu.amidst.dynamic.utils.DataSetGenerator.generate(0,100, 100,0,10); // DataStreamWriter.writeDataToFile(dataDynamic, "./datasets/artificialDatasetDynamic.arff"); } }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/syntheticData.arff"); //We can save this data set to a new file using the static class DataStreamWriter DataStreamWriter.writeDataToFile(data, "datasets/simulated/tmp.arff"); } }
public static void main(String[] args) throws Exception{ int nOfDisc; int nOfCont; DataStream<DynamicDataInstance> dataGaussians = null; String path = "datasets/simulated/"; nOfCont = 3; nOfDisc = 2; dataGaussians = DataSetGenerator.generate(1,1000,nOfDisc,nOfCont); DataStreamWriter.writeDataToFile(dataGaussians, path+"exampleDS_d"+nOfDisc+"_c"+nOfCont+".arff"); nOfCont = 5; nOfDisc = 0; dataGaussians = DataSetGenerator.generate(1,10000,nOfDisc,nOfCont); DataStreamWriter.writeDataToFile(dataGaussians, path+"exampleDS_d"+nOfDisc+"_c"+nOfCont+".arff"); dataGaussians = DataSetGenerator.generate(1,50,nOfDisc,nOfCont); DataStreamWriter.writeDataToFile(dataGaussians, path+"exampleDS_d"+nOfDisc+"_c"+nOfCont+"_small.arff"); nOfCont = 0; nOfDisc = 5; dataGaussians = DataSetGenerator.generate(1,1000,nOfDisc,nOfCont); DataStreamWriter.writeDataToFile(dataGaussians, path+"exampleDS_d"+nOfDisc+"_c"+nOfCont+".arff"); }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/syntheticData.arff"); //ReservoirSampling allows to create a DataOnMemory object containing a unfiorm subsample of the data stream DataOnMemory<DataInstance> dataOnMemory = ReservoirSampling.samplingNumberOfSamples(100, data); //We can save this data set to a new file using the static class DataStreamWriter DataStreamWriter.writeDataToFile(data, "datasets/simulated/subsample.arff"); } }
return map.get(id).compareTo(year)==0; }).collect(Collectors.toList()); DataStreamWriter.writeDataToFile(new DataOnMemoryListContainer<DataInstance>(dataInstances.getAttributes(),list),"./datasets/abstract_"+year+".arff"); System.out.println(year+" "+list.size());
public static void shuflle(String[] args) throws IOException { //Utils.shuffleData("/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.arff", "/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.shuffled.arff"); DataStream<DataInstance> dataInstances = DataStreamLoader.open("/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.arff"); List<DataOnMemory<DataInstance>> batches = BatchSpliteratorByID.streamOverDocuments(dataInstances, 1).collect(Collectors.toList()); Collections.shuffle(batches); DataOnMemoryListContainer<DataInstance> newData = new DataOnMemoryListContainer<DataInstance>(dataInstances.getAttributes()); for (DataOnMemory<DataInstance> batch : batches) { for (DataInstance dataInstance : batch) { newData.add(dataInstance); } } DataStreamWriter.writeDataToFile(newData,"/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.shuffled.arff"); }
public static void main(String[] args) throws Exception { // load the true Bayesian network BayesianNetwork originalBnet = BayesianNetworkLoader.loadFromFile(args[0]); System.out.println("\n Network \n " + args[0]); System.out.println("\n Number of variables \n " + originalBnet.getDAG().getVariables().getNumberOfVars()); //Sampling from the input BN BayesianNetworkSampler sampler = new BayesianNetworkSampler(originalBnet); sampler.setSeed(0); // Defines the size of the data to be generated from the input BN int sizeData = Integer.parseInt(args[1]); System.out.println("\n Sampling and saving the data... \n "); DataStream<DataInstance> data = sampler.sampleToDataStream(sizeData); DataStreamWriter.writeDataToFile(data, "./data.arff"); }
public static void main(String[] agrs) throws IOException, ClassNotFoundException { //We first load the WasteIncinerator bayesian network which has multinomial and Gaussian variables. BayesianNetwork bn = BayesianNetworkLoader.loadFromFile("./networks/simulated/WasteIncinerator.bn"); //We simply create an BayesianNetworkSampler object, passing to the constructor the BN model. BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); sampler.setSeed(0); //The method sampleToDataStream returns a DataStream with ten DataInstance objects. DataStream<DataInstance> dataStream = sampler.sampleToDataStream(10); //We finally save the sampled data set to a arff file. DataStreamWriter.writeDataToFile(dataStream, "datasets/simulated/sample-WasteIncinerator.arff"); } }
public static void shuffleAbstracts(String[] args) throws IOException { String path = "/Users/andresmasegosa/Dropbox/amidst_postdoc/abstractByYear/"; if(args.length>0){ path = args[0]; } /* DataOnMemory<DataInstance> dataInstances = DataStreamLoader.loadDataOnMemoryFromFile(path+"abstract_90.arff"); DataOnMemoryListContainer<DataInstance> container = new DataOnMemoryListContainer<DataInstance>(dataInstances.getAttributes()); // String[] years = {"90","91","92","93","94","95","96","97","98","99","00","01","02","03"}; for (String year : years) { dataInstances = DataStreamLoader.loadDataOnMemoryFromFile(path+"abstract_"+year+".arff"); container.addAll(dataInstances.getList()); } DataStreamWriter.writeDataToFile(container,path+"abstracts.all.arff"); */ DataOnMemory<DataInstance> container = DataStreamLoader.loadDataOnMemoryFromFile(path+"abstracts.all.arff"); List<DataOnMemory<DataInstance>> batches = BatchSpliteratorByID.streamOverDocuments(container, 1).collect(Collectors.toList()); Collections.shuffle(batches); DataOnMemoryListContainer<DataInstance> newData = new DataOnMemoryListContainer<DataInstance>(container.getAttributes()); for (DataOnMemory<DataInstance> batch : batches) { newData.addAll(batch.getList()); } DataStreamWriter.writeDataToFile(newData,path+"abstracts.all.shuffle.arff"); }
public static void main(String[] args) throws Exception{ Stopwatch watch = Stopwatch.createStarted(); DynamicBayesianNetworkGenerator dbnGenerator = new DynamicBayesianNetworkGenerator(); dbnGenerator.setNumberOfContinuousVars(0); dbnGenerator.setNumberOfDiscreteVars(3); dbnGenerator.setNumberOfStates(2); DynamicBayesianNetwork network = DynamicBayesianNetworkGenerator.generateDynamicNaiveBayes(new Random(0), 2, true); DynamicBayesianNetworkSampler sampler = new DynamicBayesianNetworkSampler(network); sampler.setSeed(0); DataStream<DynamicDataInstance> dataStream = sampler.sampleToDataBase(3,2); DataStreamWriter.writeDataToFile(dataStream, "./datasets/simulated/dnb-samples.arff"); System.out.println(watch.stop()); for (DynamicAssignment dynamicdataassignment : sampler.sampleToDataBase(3, 2)){ System.out.println("\n Sequence ID" + dynamicdataassignment.getSequenceID()); System.out.println("\n Time ID" + dynamicdataassignment.getTimeID()); System.out.println(dynamicdataassignment.outputString()); } } }
public static void main(String[] args) throws Exception{ int nContinuousAttributes=0; int nDiscreteAttributes=5; String names[] = {"SEQUENCE_ID", "TIME_ID","DEFAULT","Income","Expenses","Balance","TotalCredit"}; String path = "datasets/simulated/"; int nSamples=1000; String filename="bank_data_test"; int seed = filename.hashCode(); //Generate random dynamic data DataStream<DynamicDataInstance> data = DataSetGenerator.generate(seed,nSamples,nDiscreteAttributes,nContinuousAttributes); List<Attribute> list = new ArrayList<Attribute>(); //Replace the names IntStream.range(0, data.getAttributes().getNumberOfAttributes()) .forEach(i -> { Attribute a = data.getAttributes().getFullListOfAttributes().get(i); StateSpaceType s = a.getStateSpaceType(); Attribute a2 = new Attribute(a.getIndex(), names[i],s); list.add(a2); }); //New list of attributes Attributes att2 = new Attributes(list); List<DynamicDataInstance> listData = data.stream().collect(Collectors.toList()); //Datastream with the new attribute names DataStream<DynamicDataInstance> data2 = new DataOnMemoryListContainer<DynamicDataInstance>(att2,listData); //Write to a single file DataStreamWriter.writeDataToFile(data2, path+filename+".arff"); }
public static void main(String[] args) throws Exception{ int nContinuousAttributes=4; int nDiscreteAttributes=1; String names[] = {"SEQUENCE_ID", "TIME_ID","Default","Income","Expenses","Balance","TotalCredit"}; String path = "datasets/simulated/"; int nSamples=1000; int seed = 11234; String filename="bank_data_test"; //Generate random dynamic data DataStream<DynamicDataInstance> data = DataSetGenerator.generate(seed,nSamples,nDiscreteAttributes,nContinuousAttributes); List<Attribute> list = new ArrayList<Attribute>(); //Replace the names IntStream.range(0, data.getAttributes().getNumberOfAttributes()) .forEach(i -> { Attribute a = data.getAttributes().getFullListOfAttributes().get(i); StateSpaceType s = a.getStateSpaceType(); Attribute a2 = new Attribute(a.getIndex(), names[i],s); list.add(a2); }); //New list of attributes Attributes att2 = new Attributes(list); List<DynamicDataInstance> listData = data.stream().collect(Collectors.toList()); //Datastream with the new attribute names DataStream<DynamicDataInstance> data2 = new DataOnMemoryListContainer<DynamicDataInstance>(att2,listData); //Write to a single file DataStreamWriter.writeDataToFile(data2, path+filename+".arff"); }
private static void sampleBayesianNetwork() throws IOException { BayesianNetwork bn = new BayesianNetwork(dag); BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); sampler.setSeed(0); //The method sampleToDataStream returns a DataStream with ten DataInstance objects. DataStream<DataInstance> dataStream = sampler.sampleToDataStream(getSampleSize()); //We finally save the sampled data set to an arff file. DataStreamWriter.writeDataToFile(dataStream, "datasets/sampleBatchSize.arff"); }
public static void generateData() throws Exception { DynamicBayesianNetwork network = createDynamicFireDetectorModel(); DynamicBayesianNetworkSampler sampler = new DynamicBayesianNetworkSampler(network); sampler.setSeed(1); sampler.setLatentVar(network.getDynamicVariables().getVariableByName("Temperature")); sampler.setLatentVar(network.getDynamicVariables().getVariableByName("Smoke")); DataStream<DynamicDataInstance> dataStream = sampler.sampleToDataBase(100,1000); DataStreamWriter.writeDataToFile(dataStream, "./datasets/TimeIndexedSensorReadings.arff"); dataStream = sampler.sampleToDataBase(1,10); DataStreamWriter.writeDataToFile(dataStream, "./datasets/TimeIndexedSensorReadingsEvidence.arff"); } public static void generateEvidenceData() throws Exception {
private static void sampleBayesianNetwork() throws IOException { BayesianNetwork bn = new BayesianNetwork(dag); BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); sampler.setSeed(0); //The method sampleToDataStream returns a DataStream with ten DataInstance objects. DataStream<DataInstance> dataStream = sampler.sampleToDataStream(getSampleSize()); //We finally save the sampled data set to an arff file. DataStreamWriter.writeDataToFile(dataStream, getPathToFile()); }
public static void main(String[] args) throws Exception{ //We first generate a DBN with 3 continuous and 3 discrete variables with 2 states DynamicBayesianNetworkGenerator dbnGenerator = new DynamicBayesianNetworkGenerator(); dbnGenerator.setNumberOfContinuousVars(3); dbnGenerator.setNumberOfDiscreteVars(3); dbnGenerator.setNumberOfStates(2); //Create a NB-like structure with temporal links in the children (leaves) and 2 states for //the class variable DynamicBayesianNetwork network = DynamicBayesianNetworkGenerator.generateDynamicNaiveBayes( new Random(0), 2, true); //Create the sampler from this network DynamicBayesianNetworkSampler sampler = new DynamicBayesianNetworkSampler(network); sampler.setSeed(0); //Sample a dataStream of 3 sequences of 1000 samples each DataStream<DynamicDataInstance> dataStream = sampler.sampleToDataBase(3,1000); //Save the created data sample in a file DataStreamWriter.writeDataToFile(dataStream, "./datasets/simulated/dnb-samples.arff"); } }
DataStreamWriter.writeDataToFile(data2, path+"cajamar.arff");
public static void runParallelKMeans() throws IOException { DataStream<DataInstance> data; if(isSampleData()) { BayesianNetworkGenerator.setNumberOfGaussianVars(getNumGaussVars()); BayesianNetworkGenerator.setNumberOfMultinomialVars(getNumDiscVars(), getNumStates()); BayesianNetwork bn = BayesianNetworkGenerator.generateBayesianNetwork(); data = new BayesianNetworkSampler(bn).sampleToDataStream(getSampleSize()); DataStreamWriter.writeDataToFile(data, pathToFile); } data = DataStreamLoader.open(pathToFile); ParallelKMeans.setBatchSize(batchSize); double[][] centroids = ParallelKMeans.learnKMeans(getK(),data); for (int clusterID = 0; clusterID < centroids.length; clusterID++) { System.out.println("Cluster "+(clusterID+1)+": "+Arrays.toString(centroids[clusterID])); } }
DataStreamWriter.writeDataToFile(dataStream, "./data.arff");
BayesianNetworkSampler sampler = new BayesianNetworkSampler(bn); DataStream<DataInstance> dataStream = sampler.sampleToDataStream(sampleSize); DataStreamWriter.writeDataToFile(dataStream, dataFile);