public ClusterEvaluator( Fields argumentFields, Cluster cluster, ComparisonMeasure comparisonMeasure, CompareFunction compareFunction ) { this.cluster = cluster; this.comparisonMeasure = comparisonMeasure; this.compareFunctions = createCompareFunctions( argumentFields, compareFunction ); this.points = cluster.getPoints(); }
public ClusteringSpec( ModelSchema schemaParam, List<Cluster> clusters ) { super( schemaParam ); setClusters( clusters ); }
public String getTargetCategory() { return cluster.getTargetCategory(); } }
ClusteringSpec clusteringSpec = new ClusteringSpec( modelSchema ); clusteringSpec.setDefaultCompareFunction( new AbsoluteDifferenceCompareFunction() ); clusteringSpec.setComparisonMeasure( new SquaredEuclideanMeasure() ); clusteringSpec.addCluster( new Cluster( "1", 5.006d, 3.428d, 1.462d, 0.246d ) ); clusteringSpec.addCluster( new Cluster( "2", 5.9296875d, 2.7578125d, 4.4109375d, 1.4390625d ) ); clusteringSpec.addCluster( new Cluster( "3", 6.85277777777778d, 3.075d, 5.78611111111111d, 2.09722222222222d ) ); ClusteringFunction clusteringFunction = new ClusteringFunction( clusteringSpec );
public void addCluster( Cluster cluster ) { if( getModelSchema().getExpectedFieldNames().size() != cluster.getPointsSize() ) throw new IllegalArgumentException( "given points must be same size as active fields" ); cluster.setOrdinal( getClusters().size() + 1 ); this.clusters.add( cluster ); }
@Override public void prepare( FlowProcess flowProcess, OperationCall<Context<EvaluatorContext>> operationCall ) { super.prepare( flowProcess, operationCall ); operationCall.getContext().payload = new EvaluatorContext(); operationCall.getContext().payload.evaluators = getSpec().getClusterEvaluator( operationCall.getArgumentFields() ); operationCall.getContext().payload.results = new double[ getSpec().getClusters().size() ]; }
public ClusterEvaluator[] getClusterEvaluator( Fields argumentFields ) { List<Cluster> sorted = new ArrayList<Cluster>( clusters ); final DataField predictedField = getModelSchema().getPredictedField( getModelSchema().getPredictedFieldNames().get( 0 ) ); // order tables in category order as this is the declared field name order if( predictedField instanceof CategoricalDataField ) { Ordering<Cluster> ordering = Ordering.natural().onResultOf( new Function<Cluster, Comparable>() { @Override public Comparable apply( Cluster cluster ) { return ( (CategoricalDataField) predictedField ).getCategories().indexOf( cluster.getTargetCategory() ); } } ); Collections.sort( sorted, ordering ); } ClusterEvaluator[] clusterEvaluators = new ClusterEvaluator[ sorted.size() ]; for( int i = 0; i < sorted.size(); i++ ) clusterEvaluators[ i ] = new ClusterEvaluator( argumentFields, sorted.get( i ), getComparisonMeasure(), getDefaultCompareFunction() ); return clusterEvaluators; }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Context<EvaluatorContext>> functionCall ) { ClusterEvaluator[] evaluators = functionCall.getContext().payload.evaluators; double[] results = functionCall.getContext().payload.results; for( int i = 0; i < evaluators.length; i++ ) results[ i ] = evaluators[ i ].evaluate( functionCall.getArguments() ); LOG.debug( "results: {}", results ); // calc min distance double min = Doubles.min( results ); int index = Doubles.indexOf( results, min ); String category = evaluators[ index ].getTargetCategory(); LOG.debug( "category: {}", category ); // emit distance, and intermediate cluster category scores if( !getSpec().getModelSchema().isIncludePredictedCategories() ) { functionCall.getOutputCollector().add( functionCall.getContext().result( category ) ); return; } Tuple result = functionCall.getContext().tuple; result.set( 0, category ); for( int i = 0; i < results.length; i++ ) result.set( i + 1, results[ i ] ); functionCall.getOutputCollector().add( result ); } }
public void setClusters( List<Cluster> clusters ) { this.clusters.clear(); for( Cluster cluster : clusters ) addCluster( cluster ); }
@Override public Comparable apply( Cluster cluster ) { return ( (CategoricalDataField) predictedField ).getCategories().indexOf( cluster.getTargetCategory() ); } } );