public ParallelVBTranslate(DAG dag, List<String> latentNames, List<String> latentInterfaceNames, List<String> noLatentVariablesName) { latentVariables = latentNames.stream().map(name -> dag.getVariables().getVariableByName(name)).collect(Collectors.toList()); latentInterfaceVariables = latentInterfaceNames.stream().map(name -> dag.getVariables().getVariableByName(name)).collect(Collectors.toList()); allVariables = noLatentVariablesName.stream().map(name -> dag.getVariables().getVariableByName(name)).collect(Collectors.toList()); }
private void updateTime0(DataFlink<DynamicDataInstance> data){ DataFlink<DataInstance> newdata = DataFlinkConverter.convertToStatic(data); this.parallelVBTime0.updateModel(newdata); List<Variable> vars = this.latentVariablesNames .stream() .map(name -> this.dagTime0.getVariables().getVariableByName(name)) .collect(Collectors.toList()); this.dataPosteriorDataSet = this.parallelVBTime0.computePosteriorAssignment(newdata, vars); }
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); svb = Serialization.deserializeObject(parameters.getBytes(eu.amidst.flinklink.core.learning.parametric.ParallelVB.SVB, null)); svb.initLearning(); List<String> variableNames = Serialization.deserializeObject(parameters.getBytes(LATENT_VARIABLE_NAMES, null)); List<String> interfaceVariablenames = Serialization.deserializeObject(parameters.getBytes(LATENT_INTERFACE_VARIABLE_NAMES, null)); latentVariables = variableNames.stream().map(name -> svb.getDAG().getVariables().getVariableByName(name)).collect(Collectors.toList()); latentInterfaceVariables = interfaceVariablenames.stream().map(name -> svb.getDAG().getVariables().getVariableByName(name)).collect(Collectors.toList()); } }
public <E extends UnivariateDistribution> E getPosteriorDistribution(String varName) { if (learningAlgorithm !=null){ return (E)this.learningAlgorithm.getLearntBayesianNetwork() .getConditionalDistribution(dag.getVariables().getVariableByName(varName)); } else if (learningAlgorithmFlink != null ){ return (E)this.learningAlgorithmFlink.getLearntBayesianNetwork() .getConditionalDistribution(dag.getVariables().getVariableByName(varName)); } return null; }
public static void main(String[] args) throws Exception { // load the true Bayesian network BayesianNetwork originalBnet = BayesianNetworkLoader.loadFromFile(args[0]); System.out.println("\n Network \n " + args[0]); System.out.println("\n Number of variables \n " + originalBnet.getDAG().getVariables().getNumberOfVars()); //Sampling from the input BN BayesianNetworkSampler sampler = new BayesianNetworkSampler(originalBnet); sampler.setSeed(0); // Defines the size of the data to be generated from the input BN int sizeData = Integer.parseInt(args[1]); System.out.println("\n Sampling and saving the data... \n "); DataStream<DataInstance> data = sampler.sampleToDataStream(sizeData); DataStreamWriter.writeDataToFile(data, "./data.arff"); }
public static void main(String[] args) throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "./data.arff", false); DAG dag = SetBNwithHidden.getHiddenNaiveBayesStructure(dataFlink); BayesianNetwork bnet = new BayesianNetwork(dag); System.out.println("\n Number of variables \n " + bnet.getDAG().getVariables().getNumberOfVars()); System.out.println(dag.toString()); BayesianNetworkWriter.save(bnet, "./BNHiddenExample.bn"); }
/** * Returns the Bayesian network at Time 0. * @return a {@link BayesianNetwork} object. */ public BayesianNetwork toBayesianNetworkTime0(){ DAG dagTime0 = this.getDynamicDAG().toDAGTime0(); BayesianNetwork bnTime0 = new BayesianNetwork(dagTime0); for (Variable dynamicVar : this.getDynamicVariables()) { Variable staticVar = dagTime0.getVariables().getVariableByName(dynamicVar.getName()); ConditionalDistribution deepCopy = Serialization.deepCopy(this.getConditionalDistributionTime0(dynamicVar)); deepCopy.setVar(staticVar); List<Variable> newParents = deepCopy.getConditioningVariables().stream().map(var -> dagTime0.getVariables().getVariableByName(var.getName())).collect(Collectors.toList()); deepCopy.setConditioningVariables(newParents); bnTime0.setConditionalDistribution(staticVar,deepCopy); } return bnTime0; }
/** * Returns the Bayesian network at Time T. * @return a {@link BayesianNetwork} object. */ public BayesianNetwork toBayesianNetworkTimeT(){ DAG dagTimeT = this.getDynamicDAG().toDAGTimeT(); BayesianNetwork bnTimeT = new BayesianNetwork(dagTimeT); for (Variable dynamicVar : this.getDynamicVariables()) { Variable staticVar = dagTimeT.getVariables().getVariableByName(dynamicVar.getName()); ConditionalDistribution deepCopy = Serialization.deepCopy(this.getConditionalDistributionTimeT(dynamicVar)); deepCopy.setVar(staticVar); List<Variable> newParents = deepCopy.getConditioningVariables().stream().map(var -> dagTimeT.getVariables().getVariableByName(var.getName())).collect(Collectors.toList()); deepCopy.setConditioningVariables(newParents); bnTimeT.setConditionalDistribution(staticVar,deepCopy); } return bnTimeT; }
/** * This method contains the code needed to build the NaiveBayes DAG with a global hidden variable modelling * concept drift. * @return A poperly created {@link DAG} object. * @throws Exception if an error occurs while reading the file. */ public static DAG modelBuilding() throws Exception { //We load the data for one month DataStream<DataInstance> instances = DataStreamLoader.open("./datasets/bnaic2015/BCC/Month0.arff"); //Define the variables. By default, a random variable is created for each attribute Variables variables = new Variables(instances.getAttributes()); //We create a new global hidden Gaussian variable Variable hiddenGaussian = variables.newGaussianVariable("HiddenGaussian"); //We get the variable Default Variable defaultVariable = variables.getVariableByName("default"); //We define the DAG DAG dag = new DAG(variables); //We add the links of the DAG dag.getVariables() .getListOfVariables() .stream() .filter(var -> var != defaultVariable) .filter(var -> var != hiddenGaussian) .forEach(var -> { dag.getParentSet(var).addParent(defaultVariable); dag.getParentSet(var).addParent(hiddenGaussian); }); return dag; }
/** * This method constains the code needed to learn the model and produce the output. * @param parallelSVB a parallel SVB (Streaming Variational Bayes) object * @return a Bayesian Network * @throws IOException if a file reading error occurs */ public static BayesianNetwork learnModel(ParallelSVB parallelSVB) throws IOException { //We access the hidden var Variable hiddenGaussian = parallelSVB.getSVBEngine().getDAG().getVariables().getVariableByName("HiddenGaussian"); //For each month of the period for (int i = 0; i < MONTHS; i++) { //We load the data for the given month DataStream<DataInstance> monthlyData = DataStreamLoader.open("./datasets/bnaic2015/BCC/Month" + i + ".arff"); //We update the model in parallel with the data from data month parallelSVB.updateModel(monthlyData); //We query the given var Normal normal = parallelSVB.getSVBEngine().getParameterPosterior(hiddenGaussian); //We print the mean of this Gaussian var System.out.println("E(H) at month "+i+":\t" + normal.getMean()); } //Finally we get the learnt Bayesian network and return it. return parallelSVB.getLearntBayesianNetwork(); }
System.out.println("\n Number of variables \n " + bn.getDAG().getVariables().getNumberOfVars()); System.out.println(dag.toString());
@Override public void flatMap(Batch<DataPosteriorAssignment> data, Collector<DataPosteriorAssignment> out) { for (int i = 0; i < data.getElements().size(); i++) { for (Variable latentVariable : latentInterfaceVariables) { DataPosteriorAssignment dataPosteriorAssignment = data.getElements().get(i); if (!dataPosteriorAssignment.isObserved(latentVariable)){ UnivariateDistribution dist = dataPosteriorAssignment.getPosterior().getPosterior(latentVariable); Variable interfaceVariable = this.svb.getDAG().getVariables().getVariableByName(latentVariable.getName() + DynamicVariables.INTERFACE_SUFFIX); this.svb.getPlateuStructure().getNodeOfVar(latentVariable, i).setPDist(dist.toEFUnivariateDistribution().deepCopy(interfaceVariable)); this.svb.getPlateuStructure().getNodeOfVar(latentVariable, i).setAssignment(null); } } } DataOnMemory<DataInstance> dataBatch = new DataOnMemoryListContainer<DataInstance>( attributes, data.getElements().stream() .map(d -> new DataInstanceFromAssignment(d.getPosterior().getId(), d.getAssignment(), attributes, variables)) .collect(Collectors.toList()) ); List<DataPosteriorAssignment> posteriorAssignments = svb.computePosteriorAssignment(dataBatch, latentVariables); for (DataPosteriorAssignment posterior: posteriorAssignments){ out.collect(posterior); } }
public <E extends UnivariateDistribution> E getParameterPosteriorTimeT(Variable parameter) { if (parameter.isParameterVariable()) { Variable newVar =this.svbTimeT.getPlateuStructure().getEFLearningBN().getParametersVariables().getVariableByName(parameter.getName()); return this.svbTimeT.getParameterPosterior(newVar); }else { Variable newVar =this.dagTimeT.getVariables().getVariableByName(parameter.getName()); return this.svbTimeT.getParameterPosterior(newVar); } }
Variable sensorT1 = fireDetectorModel.getVariables().getVariableByName("SensorTemp1_"+i); Variable sensorT2 = fireDetectorModel.getVariables().getVariableByName("SensorTemp2_"+i); svb.addParameterConstraint(new Constraint("alpha", sensorT1, 0.0)); svb.addParameterConstraint(new Constraint("alpha", sensorT2, 0.0)); svb.addParameterConstraint(new Constraint("beta1", sensorT2, 1.0)); Variable temp = fireDetectorModel.getVariables().getVariableByName("Temperature_"+i); svb.addParameterConstraint(new Constraint("alpha | {Fire_"+i+" = 0}", temp, 0.0)); svb.addParameterConstraint(new Constraint("beta1 | {Fire_"+i+" = 0}", temp, 1.0));
Variable sensorT1 = fireDetectorModel.getVariables().getVariableByName("SensorTemp1"); Variable sensorT2 = fireDetectorModel.getVariables().getVariableByName("SensorTemp2"); svb.addParameterConstraint(new Constraint("alpha", sensorT1, 0.0)); svb.addParameterConstraint(new Constraint("alpha", sensorT2, 0.0)); Normal_MultinomialParents dist = model.getConditionalDistribution(fireDetectorModel.getVariables().getVariableByName("Temperature")); tempsMonth[i] = dist.getNormal(0).getMean();
public <E extends UnivariateDistribution> E getParameterPosteriorTime0(Variable parameter) { if (parameter.isParameterVariable()) { Variable newVar =this.parallelVBTime0.getSVB().getPlateuStructure().getEFLearningBN().getParametersVariables().getVariableByName(parameter.getName()); return this.parallelVBTime0.getParameterPosterior(newVar); }else { Variable newVar =this.dagTime0.getVariables().getVariableByName(parameter.getName()); return this.parallelVBTime0.getParameterPosterior(newVar); } }
latentInterfaceVariables = names.stream().map(name -> svb.getDAG().getVariables().getVariableByName(name)).collect(Collectors.toList());
/** * This method contains the code to set up the plateau model. * @param dag, the DAG to be replicated * @return A properly initialized {@link ParallelSVB} object. */ public static ParallelSVB plateuModelSetUp(DAG dag){ //We access the hidden var Variable hiddenGaussian = dag.getVariables().getVariableByName("HiddenGaussian"); //We create the ParalleVB object which will perform the learning ParallelSVB parallelSVB = new ParallelSVB(); //Set the DAG parallelSVB.setDAG(dag); //We tell how the above DAG should be expanded. parallelSVB.getSVBEngine().setPlateuStructure(new PlateuIIDReplication(Arrays.asList(hiddenGaussian))); //We also tell how to evolve the hidden variable over time GaussianHiddenTransitionMethod gaussianHiddenTransitionMethod = new GaussianHiddenTransitionMethod(Arrays.asList(hiddenGaussian), 0, 0.1); parallelSVB.getSVBEngine().setTransitionMethod(gaussianHiddenTransitionMethod); //We set the window/batch size used for learning parallelSVB.getSVBEngine().setWindowsSize(100); //We set the maximum number of iteration of the VMP method parallelSVB.getSVBEngine().getPlateuStructure().getVMP().setMaxIter(100); //We set the threshold definining the convergence of the VMP method parallelSVB.getSVBEngine().getPlateuStructure().getVMP().setThreshold(0.001); //We do not allow for debuggin info. parallelSVB.setOutput(false); //We invoke the setup of the underlying data structures parallelSVB.initLearning(); return parallelSVB; }
public static void baseTest(ExecutionEnvironment env, DataStream<DataInstance> data, BayesianNetwork network, int batchSize, double error) throws IOException, ClassNotFoundException { DataStreamWriter.writeDataToFile(data, "./datasets/simulated/tmp.arff"); DataFlink<DataInstance> dataFlink = DataFlinkLoader.loadDataFromFile(env, "./datasets/simulated/tmp.arff", false); network.getDAG().getVariables().setAttributes(dataFlink.getAttributes()); //Structure learning is excluded from the test, i.e., we use directly the initial Asia network structure // and just learn then test the parameter learning //Parameter Learning dVMP parallelVB = new dVMP(); parallelVB.setOutput(true); parallelVB.setMaximumGlobalIterations(10); parallelVB.setSeed(5); parallelVB.setBatchSize(batchSize); parallelVB.setLocalThreshold(0.001); parallelVB.setGlobalThreshold(0.01); parallelVB.setMaximumLocalIterations(100); parallelVB.setMaximumGlobalIterations(100); parallelVB.setDAG(network.getDAG()); parallelVB.initLearning(); parallelVB.updateModel(dataFlink); BayesianNetwork bnet = parallelVB.getLearntBayesianNetwork(); //Check if the probability distributions of each node for (Variable var : network.getVariables()) { if (Main.VERBOSE) System.out.println("\n------ Variable " + var.getName() + " ------"); if (Main.VERBOSE) System.out.println("\nTrue distribution:\n" + network.getConditionalDistribution(var)); if (Main.VERBOSE) System.out.println("\nLearned distribution:\n" + bnet.getConditionalDistribution(var)); Assert.assertTrue(bnet.getConditionalDistribution(var).equalDist(network.getConditionalDistribution(var), error)); } //Or check directly if the true and learned networks are equals Assert.assertTrue(bnet.equalBNs(network, error)); }
.map(new DynamicParallelVB.ParallelVBMap(data.getAttributes(), this.dagTimeT.getVariables().getListOfVariables(),randomStart, idenitifableModelling)) .withParameters(config) .withBroadcastSet(loop, "VB_PARAMS_" + this.dagTimeT.getName()) .flatMap(new CajaMarLearnMapInferenceAssignment(data.getAttributes(), this.dagTimeT.getVariables().getListOfVariables())) .withParameters(config);