public CategoricalRegressionFunction( GeneralRegressionSpec regressionSpec ) { super( regressionSpec ); if( regressionSpec.getNormalization() == null ) throw new IllegalArgumentException( "normalization may not be null" ); ModelSchema modelSchema = regressionSpec.getModelSchema(); DataField predictedField = modelSchema.getPredictedField( modelSchema.getPredictedFieldNames().get( 0 ) ); if( !( predictedField instanceof CategoricalDataField ) ) throw new IllegalArgumentException( "predicted field must be categorical" ); if( ( (CategoricalDataField) predictedField ).getCategories().size() != regressionSpec.getRegressionTables().size() ) throw new IllegalArgumentException( "predicted field categories must be same size as the number of regression tables" ); }
public ExpressionEvaluator[] getRegressionTableEvaluators( Fields argumentFields ) { List<RegressionTable> tables = new ArrayList<RegressionTable>( regressionTables ); final DataField predictedField = getModelSchema().getPredictedField( getModelSchema().getPredictedFieldNames().get( 0 ) ); // order tables in category order as this is the declared field name order if( predictedField instanceof CategoricalDataField ) { Ordering<RegressionTable> ordering = Ordering.natural().onResultOf( new Function<RegressionTable, Comparable>() { private List<String> categories = ( (CategoricalDataField) predictedField ).getCategories(); @Override public Comparable apply( RegressionTable regressionTable ) { return categories.indexOf( regressionTable.getTargetCategory() ); } } ); Collections.sort( tables, ordering ); } ExpressionEvaluator[] evaluators = new ExpressionEvaluator[ tables.size() ]; for( int i = 0; i < tables.size(); i++ ) evaluators[ i ] = tables.get( i ).bind( argumentFields ); return evaluators; }
@Override public void operate( FlowProcess flowProcess, FunctionCall<Context<BaseRegressionFunction.ExpressionContext>> functionCall ) { TupleEntry arguments = functionCall.getArguments(); ExpressionEvaluator[] expressions = functionCall.getContext().payload.expressions; double[] results = functionCall.getContext().payload.results; for( int i = 0; i < expressions.length; i++ ) results[ i ] = expressions[ i ].calculate( arguments ); LOG.debug( "raw regression: {}", results ); for( int i = 0; i < expressions.length; i++ ) results[ i ] = getSpec().getLinkFunction().calculate( results[ i ] ); LOG.debug( "link regression: {}", results ); results = getSpec().getNormalization().normalize( results ); LOG.debug( "probabilities: {}", results ); double max = Doubles.max( results ); int index = Doubles.indexOf( results, max ); String category = expressions[ index ].getTargetCategory(); LOG.debug( "category: {}", category ); if( !getSpec().getModelSchema().isIncludePredictedCategories() ) { functionCall.getOutputCollector().add( functionCall.getContext().result( category ) ); return; } Tuple result = functionCall.getContext().tuple; result.set( 0, category ); for( int i = 0; i < results.length; i++ ) result.set( i + 1, results[ i ] ); functionCall.getOutputCollector().add( result ); } }