InstanceList instances = new InstanceList(random, 100, 2).subList(0,10); System.err.println(instances.size() + " instances"); Clustering clustering = generateClustering(instances); System.err.println("clustering=" + clustering); NeighborIterator iter = new ClusterSampleIterator(clustering, random, 0.5, 10); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n"); iter = new PairSampleIterator(clustering, random, 0.5, 10); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n"); iter = new AllPairsIterator(clustering); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n");
/** * * @param clustering True clustering. * @param random Source of randomness. * @param positiveProportion Proportion of Instances that should be positive examples. * @param numberSamples Total number of samples to generate. * @return */ public PairSampleIterator (Clustering clustering, Randoms random, double positiveProportion, int numberSamples) { super(clustering); this.random = random; this.positiveProportion = positiveProportion; this.numberSamples = numberSamples; this.positiveTarget = (int)(numberSamples * positiveProportion); this.totalCount = this.positiveCount = 0; this.instances = clustering.getInstances(); setNonSingletons(); }
public double score(Clustering clustering) { Clustering singletons = ClusterUtils .createSingletonClustering(clustering.getInstances()); double total = 0; int count = 0; for (AllPairsIterator iter = new AllPairsIterator(singletons); iter .hasNext(); count++) { Instance instance = (Instance) iter.next(); AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) instance .getData(); double score = evaluator.evaluate(neighbor); int[][] clusters = neighbor.getOldClusters(); if (clustering.getLabel(clusters[0][0]) == clustering .getLabel(clusters[1][0])) total += score; else total += 1.0 - score; } return (double) total / count; }
PairSampleIterator iterator = new PairSampleIterator(training .get(i), random, 0.5, training.get(i).getNumInstances()); while(iterator.hasNext()) { Instance inst = iterator.next(); trainingInstances.add(pipe.pipe(inst));
public Instance next () { AgglomerativeNeighbor neighbor = null; if ((positiveCount < positiveTarget || clustering.getNumClusters() == 1) && nonsingletonClusters.length > 0) { positiveCount++; int label = nonsingletonClusters[random.nextInt(nonsingletonClusters.length)]; int[] instances = clustering.getIndicesWithLabel(label); int[][] clusters = sampleSplitFromArray(instances, random, 2); neighbor = new AgglomerativeNeighbor(clustering, clustering, clusters); } else { int labeli = random.nextInt(clustering.getNumClusters()); int labelj = random.nextInt(clustering.getNumClusters()); while (labeli == labelj) labelj = random.nextInt(clustering.getNumClusters()); neighbor = new AgglomerativeNeighbor(clustering, ClusterUtils.copyAndMergeClusters(clustering, labeli, labelj), sampleFromArray(clustering.getIndicesWithLabel(labeli), random, 1), sampleFromArray(clustering.getIndicesWithLabel(labelj), random, 1)); } totalCount++; return new Instance(neighbor, null, null, null); }
int[] subcluster = sampleFromArray(instances, random, 2); int[] cluster1 = new int[]{subcluster[random.nextInt(subcluster.length)]}; // Singleton. int[] cluster2 = new int[subcluster.length - 1]; labelj = random.nextInt(clustering.getNumClusters()); int[] ii = sampleFromArray(clustering.getIndicesWithLabel(labeli), random, 1); int[] ij = sampleFromArray(clustering.getIndicesWithLabel(labelj), random, 1);
System.err.println("Training with " + training); InstanceList trainList = new InstanceList(clusterPipe); trainList.addThruPipe(new ClusterSampleIterator(training, random, 0.5, 100)); System.err.println("Created " + trainList.size() + " instances."); Classifier me = new MaxEntTrainer().train(trainList);
InstanceList instances = new InstanceList(random, 100, 2).subList(0,10); System.err.println(instances.size() + " instances"); Clustering clustering = generateClustering(instances); System.err.println("clustering=" + clustering); NeighborIterator iter = new ClusterSampleIterator(clustering, random, 0.5, 10); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n"); iter = new PairSampleIterator(clustering, random, 0.5, 10); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n"); iter = new AllPairsIterator(clustering); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n");
public double score(Clustering clustering) { Clustering singletons = ClusterUtils .createSingletonClustering(clustering.getInstances()); double total = 0; int count = 0; for (AllPairsIterator iter = new AllPairsIterator(singletons); iter .hasNext(); count++) { Instance instance = (Instance) iter.next(); AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) instance .getData(); double score = evaluator.evaluate(neighbor); int[][] clusters = neighbor.getOldClusters(); if (clustering.getLabel(clusters[0][0]) == clustering .getLabel(clusters[1][0])) total += score; else total += 1.0 - score; } return (double) total / count; }
PairSampleIterator iterator = new PairSampleIterator(training .get(i), random, 0.5, training.get(i).getNumInstances()); while(iterator.hasNext()) { Instance inst = iterator.next(); trainingInstances.add(pipe.pipe(inst));
public Instance next () { AgglomerativeNeighbor neighbor = null; if ((positiveCount < positiveTarget || clustering.getNumClusters() == 1) && nonsingletonClusters.length > 0) { positiveCount++; int label = nonsingletonClusters[random.nextInt(nonsingletonClusters.length)]; int[] instances = clustering.getIndicesWithLabel(label); int[][] clusters = sampleSplitFromArray(instances, random, 2); neighbor = new AgglomerativeNeighbor(clustering, clustering, clusters); } else { int labeli = random.nextInt(clustering.getNumClusters()); int labelj = random.nextInt(clustering.getNumClusters()); while (labeli == labelj) labelj = random.nextInt(clustering.getNumClusters()); neighbor = new AgglomerativeNeighbor(clustering, ClusterUtils.copyAndMergeClusters(clustering, labeli, labelj), sampleFromArray(clustering.getIndicesWithLabel(labeli), random, 1), sampleFromArray(clustering.getIndicesWithLabel(labelj), random, 1)); } totalCount++; return new Instance(neighbor, null, null, null); }
/** * * @param clustering True clustering. * @param random Source of randomness. * @param positiveProportion Proportion of Instances that should be positive examples. * @param numberSamples Total number of samples to generate. * @return */ public PairSampleIterator (Clustering clustering, Randoms random, double positiveProportion, int numberSamples) { super(clustering); this.random = random; this.positiveProportion = positiveProportion; this.numberSamples = numberSamples; this.positiveTarget = (int)(numberSamples * positiveProportion); this.totalCount = this.positiveCount = 0; this.instances = clustering.getInstances(); setNonSingletons(); }
int[] subcluster = sampleFromArray(instances, random, 2); int[] cluster1 = new int[]{subcluster[random.nextInt(subcluster.length)]}; // Singleton. int[] cluster2 = new int[subcluster.length - 1]; labelj = random.nextInt(clustering.getNumClusters()); int[] ii = sampleFromArray(clustering.getIndicesWithLabel(labeli), random, 1); int[] ij = sampleFromArray(clustering.getIndicesWithLabel(labelj), random, 1);
System.err.println("Training with " + training); InstanceList trainList = new InstanceList(clusterPipe); trainList.addThruPipe(new ClusterSampleIterator(training, random, 0.5, 100)); System.err.println("Created " + trainList.size() + " instances."); Classifier me = new MaxEntTrainer().train(trainList);
InstanceList instances = new InstanceList(random, 100, 2).subList(0,10); System.err.println(instances.size() + " instances"); Clustering clustering = generateClustering(instances); System.err.println("clustering=" + clustering); NeighborIterator iter = new ClusterSampleIterator(clustering, random, 0.5, 10); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n"); iter = new PairSampleIterator(clustering, random, 0.5, 10); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n"); iter = new AllPairsIterator(clustering); while (iter.hasNext()) { Instance instance = (Instance)iter.next(); System.err.println(instance.getData() + "\n");
public double score(Clustering clustering) { Clustering singletons = ClusterUtils .createSingletonClustering(clustering.getInstances()); double total = 0; int count = 0; for (AllPairsIterator iter = new AllPairsIterator(singletons); iter .hasNext(); count++) { Instance instance = (Instance) iter.next(); AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) instance .getData(); double score = evaluator.evaluate(neighbor); int[][] clusters = neighbor.getOldClusters(); if (clustering.getLabel(clusters[0][0]) == clustering .getLabel(clusters[1][0])) total += score; else total += 1.0 - score; } return (double) total / count; }
public Instance next () { AgglomerativeNeighbor neighbor = null; if ((positiveCount < positiveTarget || clustering.getNumClusters() == 1) && nonsingletonClusters.length > 0) { positiveCount++; int label = nonsingletonClusters[random.nextInt(nonsingletonClusters.length)]; int[] instances = clustering.getIndicesWithLabel(label); int[][] clusters = sampleSplitFromArray(instances, random, 2); neighbor = new AgglomerativeNeighbor(clustering, clustering, clusters); } else { int labeli = random.nextInt(clustering.getNumClusters()); int labelj = random.nextInt(clustering.getNumClusters()); while (labeli == labelj) labelj = random.nextInt(clustering.getNumClusters()); neighbor = new AgglomerativeNeighbor(clustering, ClusterUtils.copyAndMergeClusters(clustering, labeli, labelj), sampleFromArray(clustering.getIndicesWithLabel(labeli), random, 1), sampleFromArray(clustering.getIndicesWithLabel(labelj), random, 1)); } totalCount++; return new Instance(neighbor, null, null, null); }
/** * * @param clustering True clustering. * @param random Source of randomness. * @param positiveProportion Proportion of Instances that should be positive examples. * @param numberSamples Total number of samples to generate. * @return */ public PairSampleIterator (Clustering clustering, Randoms random, double positiveProportion, int numberSamples) { super(clustering); this.random = random; this.positiveProportion = positiveProportion; this.numberSamples = numberSamples; this.positiveTarget = (int)(numberSamples * positiveProportion); this.totalCount = this.positiveCount = 0; this.instances = clustering.getInstances(); setNonSingletons(); }
int[] subcluster = sampleFromArray(instances, random, 2); int[] cluster1 = new int[]{subcluster[random.nextInt(subcluster.length)]}; // Singleton. int[] cluster2 = new int[subcluster.length - 1]; labelj = random.nextInt(clustering.getNumClusters()); int[] ii = sampleFromArray(clustering.getIndicesWithLabel(labeli), random, 1); int[] ij = sampleFromArray(clustering.getIndicesWithLabel(labelj), random, 1);
System.err.println("Training with " + training); InstanceList trainList = new InstanceList(clusterPipe); trainList.addThruPipe(new ClusterSampleIterator(training, random, 0.5, 100)); System.err.println("Created " + trainList.size() + " instances."); Classifier me = new MaxEntTrainer().train(trainList);