public void delete_xval_models( ) { if (get_params().xval_models != null) { for (Key k : get_params().xval_models) { UKV.<DeepLearningModel>get(k).delete_best_model(); UKV.<DeepLearningModel>get(k).delete(); } } }
try { final long now = System.currentTimeMillis(); epoch_counter = (float)model_info().get_processed_total()/training_rows; final double time_last_iter_millis = now-_timeLastScoreEnter; if (H2O.CLOUD.size() > 1 && get_params().train_samples_per_iteration == -2 && time_for_communication_us > 1e4) { final double correction = get_params().target_ratio_comm_to_comp / comm_to_work_ratio; boolean keep_running = (epoch_counter < get_params().epochs); final long sinceLastScore = now -_timeLastScoreStart; final long sinceLastPrint = now -_timeLastPrintStart; final long samples = model_info().get_processed_total(); if (!keep_running || sinceLastPrint > get_params().score_interval*1000) { _timeLastPrintStart = now; Log.info("Training time: " + PrettyPrint.msecs(run_time, true) (sinceLastScore > get_params().score_interval*1000 //don't score too often &&(double)(_timeLastScoreEnd-_timeLastScoreStart)/sinceLastScore < get_params().score_duty_cycle) ) { //duty cycle final boolean printme = !get_params().quiet_mode; final boolean adaptCM = (isClassifier() && vadaptor.needsAdaptation2CM()); _timeLastScoreStart = now; if (get_params().diagnostics) model_info().computeStats(); Errors err = new Errors(); err.training_time_ms = run_time; err.epoch_counter = epoch_counter; err.training_samples = model_info().get_processed_total(); err.validation = ftest != null; err.score_training_samples = ftrain.numRows();
@Override protected final void execImpl() { if (dl_model == null) throw new IllegalArgumentException("Deep Learning Model must be specified."); DeepLearningModel dlm = UKV.get(dl_model); if (dlm == null) throw new IllegalArgumentException("Deep Learning Model not found."); StringBuilder sb = new StringBuilder(); if (layer < -1 || layer > dlm.get_params().hidden.length-1) throw new IllegalArgumentException("Layer must be either -1 or between 0 and " + (dlm.get_params().hidden.length-1)); if (layer == -1) layer = dlm.get_params().hidden.length-1; int features = dlm.get_params().hidden[layer]; sb.append("\nTransforming frame '" + source._key.toString() + "' with " + source.numCols() + " into " + features + " features with model '" + dl_model + "'\n"); Frame df = dlm.scoreDeepFeatures(source, layer); sb.append("Storing the new features under: " + dest() + ".\n"); Frame output = new Frame(dest(), df.names(), df.vecs()); output.delete_and_lock(null); output.unlock(null); }
@Override protected void setCrossValidationError(Job.ValidatedJob job, double cv_error, ConfusionMatrix cm, AUCData auc, HitRatio hr) { _have_cv_results = true; if (!get_params().classification) last_scored().valid_mse = cv_error; else last_scored().valid_err = cv_error; last_scored().score_validation_samples = last_scored().score_training_samples / get_params().n_folds; last_scored().num_folds = get_params().n_folds; last_scored().valid_confusion_matrix = cm; last_scored().validAUC = auc; last_scored().valid_hitratio = hr; DKV.put(this._key, this); //overwrite this model }
private double[] score_autoencoder(Chunk[] chks, int row_in_chunk, double[] tmp, double[] preds, Neurons[] neurons, boolean reconstruction, boolean reconstruction_error_per_feature) { assert(get_params()._autoencoder); assert(tmp.length == _output._names.length); for (int i=0; i<tmp.length; i++ ) tmp[i] = chks[i].atd(row_in_chunk); score_autoencoder(tmp, preds, neurons, reconstruction, reconstruction_error_per_feature); // this fills preds, returns MSE error (ignored here) return preds; }
private void putMeAsBestModel(Key bestModelKey) { final Key job = null; final DeepLearningModel cp = this; DeepLearningModel bestModel = new DeepLearningModel(cp, bestModelKey, job, model_info().data_info()); bestModel.get_params().state = Job.JobState.DONE; bestModel.get_params().job_key = get_params().self(); bestModel.delete_and_lock(job); bestModel.unlock(job); assert (UKV.get(bestModelKey) != null); assert (bestModel.compareTo(this) <= 0); assert (((DeepLearningModel) UKV.get(bestModelKey)).error() == _bestError); }
final long now = System.currentTimeMillis(); final double time_since_last_iter = now - _timeLastIterationEnter; updateTiming(jobKey); _timeLastIterationEnter = now; epoch_counter = (double)model_info().get_processed_total()/training_rows; if (H2O.CLOUD.size() > 1 && get_params()._train_samples_per_iteration == -2 && iteration > 1) { Log.debug("Auto-tuning train_samples_per_iteration."); if (time_for_communication_us > 1e4) { final double comm_to_work_ratio = (time_for_communication_us * 1e-3) / time_since_last_iter; Log.debug(" Ratio of network communication to computation: " + String.format("%.5f", comm_to_work_ratio)); Log.debug(" target_comm_to_work: " + get_params()._target_ratio_comm_to_comp); Log.debug("Old value of train_samples_per_iteration: " + actual_train_samples_per_iteration); double correction = get_params()._target_ratio_comm_to_comp / comm_to_work_ratio; correction = Math.max(0.5,Math.min(2, correction)); //it's ok to train up to 2x more training rows per iteration, but not fewer than half. if (Math.abs(correction) < 0.8 || Math.abs(correction) > 1.2) { //don't correct unless it's significant (avoid slow drift) keep_running = (epoch_counter < get_params()._epochs) && !stopped_early; final long sinceLastScore = now -_timeLastScoreStart; if( !keep_running || get_params()._score_each_iteration || (sinceLastScore > get_params()._score_interval *1000 //don't score too often &&(double)(_timeLastScoreEnd-_timeLastScoreStart)/sinceLastScore < get_params()._score_duty_cycle) ) { //duty cycle jobKey.get().update(0,"Scoring on " + fTrain.numRows() + " training samples" +(fValid != null ? (", " + fValid.numRows() + " validation samples") : "")); final boolean printme = !get_params()._quiet_mode; _timeLastScoreStart = System.currentTimeMillis(); model_info().computeStats(); //might not be necessary, but is done to be certain that numbers are good DeepLearningScoringInfo scoringInfo = new DeepLearningScoringInfo();
sb.append("Reported mean reconstruction error: " + mymodel.mse() + "\n"); final Frame rec = mymodel.scoreAutoEncoder(train, Key.make(), true); sb.append("Reconstruction error per feature: " + rec.toString() + "\n"); rec.remove(); l2 = mymodel.scoreAutoEncoder(train, Key.make(), false); final Vec l2vec = l2.anyVec(); sb.append("Actual mean reconstruction error: " + l2vec.mean() + "\n"); sb.append("The following training points are reconstructed with an error above the " + quantile * 100 + "-th percentile - potential \"outliers\" in testing data.\n"); double thresh = mymodel.calcOutlierThreshold(l2vec, quantile); for (long i = 0; i < l2vec.length(); i++) { if (l2vec.at(i) > thresh) { Assert.assertEquals(l2vec.mean(), mymodel.mse(), 1e-8 * mymodel.mse()); recon_train = mymodel.score(train); Assert.assertTrue(mymodel.testJavaScoring(train, recon_train, 1e-5)); df1 = mymodel.scoreDeepFeatures(train, 0); Assert.assertEquals(10, df1.numCols()); Assert.assertEquals(train.numRows(), df1.numRows()); df2 = mymodel.scoreDeepFeatures(train, 1); Assert.assertEquals(5, df2.numCols()); Assert.assertEquals(train.numRows(), df2.numRows());
final long model_size = model.model_info().size(); Log.info("Number of model parameters (weights/biases): " + String.format("%,d", model_size)); model.write_lock(_job); _job.update(0,"Setting up training data..."); final DeepLearningParameters mp = model.model_info().get_params(); if (!mp._quiet_mode) Log.info("Scoring the null model of the autoencoder."); model.doScoring(trainScoreFrame, validScoreFrame, _job._key, 0, false); //get the null model reconstruction error model.update(_job); model.total_setup_time_ms += now - _job.start_time(); Log.info("Total setup time: " + PrettyPrint.msecs(model.total_setup_time_ms, true)); model.set_model_info(mp._epochs == 0 ? model.model_info() : H2O.CLOUD.size() > 1 && mp._replicate_training_data ? (mp._single_node_mode ? new DeepLearningTask2(_job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations).doAll(Key.make(H2O.SELF)).model_info() : //replicated data + single node mode new DeepLearningTask2(_job._key, train, model.model_info(), rowFraction(train, mp, model), model.iterations).doAllNodes( ).model_info()): //replicated data + multi-node mode new DeepLearningTask (_job._key, model.model_info(), rowFraction(train, mp, model), model.iterations).doAll ( train ).model_info()); //distributed data (always in multi-node mode) if (stop_requested() && !timeout()) throw new Job.JobCancelledException(); if (!model.doScoring(trainScoreFrame, validScoreFrame, _job._key, model.iterations, false)) break; //finished training (or early stopping or convergence) if (timeout()) { //stop after scoring _job.update((long) (mp._epochs * train.numRows())); // mark progress as completed if (best_model != null && best_model.loss() < model.loss() && Arrays.equals(best_model.model_info().units, model.model_info().units)) { if (!_parms._quiet_mode) { Log.info("Setting the model to be the best model so far (based on scoring history)."); Log.info("Best model's loss: " + best_model.loss() + " vs this model's loss (before overwriting it with the best model): " + model.loss()); DeepLearningModelInfo mi = IcedUtils.deepCopy(best_model.model_info());
@Test public void testNoHiddenLayerRegression() { Frame tfr = null; DeepLearningModel dl = null; DeepLearningModel dl2 = null; try { tfr = parse_test_file("./smalldata/logreg/prostate.csv"); DeepLearningParameters parms = new DeepLearningParameters(); parms._train = tfr._key; parms._epochs = 1000; parms._response_column = "AGE"; parms._hidden = new int[]{}; dl = new DeepLearning(parms).trainModel().get(); Frame res = dl.score(tfr); assertTrue(dl.testJavaScoring(tfr, res, 1e-5)); res.remove(); } finally { if (tfr != null) tfr.delete(); if (dl != null) dl.delete(); if (dl2 != null) dl2.delete(); } }
sb.append("Verifying results.\n"); DeepLearningModel mymodel = UKV.get(p.dest()); sb.append("Reported mean reconstruction error: " + mymodel.mse() + "\n"); final Frame l2 = mymodel.scoreAutoEncoder(train); final Vec l2vec = l2.anyVec(); sb.append("Actual mean reconstruction error: " + l2vec.mean() + "\n"); sb.append("The following training points are reconstructed with an error above the " + quantile * 100 + "-th percentile - potential \"outliers\" in testing data.\n"); double thresh = mymodel.calcOutlierThreshold(l2vec, quantile); for (long i = 0; i < l2vec.length(); i++) { if (l2vec.at(i) > thresh) { Assert.assertEquals(mymodel.mse(), l2vec.mean(), 1e-8); final Frame recon_train = mymodel.score(train); train.delete(); p.delete(); mymodel.delete(); l2.delete();
List<Key> removeMe = new ArrayList(); if (_parms._checkpoint == null) { cp = new DeepLearningModel(dest(), _parms, new DeepLearningModel.DeepLearningModelOutput(DeepLearning.this), _train, _valid, nclasses()); if (_parms._pretrained_autoencoder != null) { final DeepLearningModel pretrained = DKV.getGet(_parms._pretrained_autoencoder); throw new H2OIllegalArgumentException("The pretrained model must be unsupervised (an autoencoder), and the model to be trained must be supervised."); Log.info("Loading model parameters of input and hidden layers from the pretrained autoencoder model."); cp.model_info().initializeFromPretrainedModel(pretrained.model_info()); } else { cp.model_info().initializeMembers(_parms._initial_weights, _parms._initial_biases); for (String st : previous.adaptTestForTrain(_train,true,false)) Log.warn(st); for (String st : previous.adaptTestForTrain(_valid,true,false)) Log.warn(st); dinfo = makeDataInfo(_train, _valid, _parms, nclasses()); cp = new DeepLearningModel(dest(), _parms, previous, false, dinfo); cp.write_lock(_job); if (dinfo.fullN() != previous.model_info().data_info().fullN()) { throw new H2OIllegalArgumentException("Total number of predictors is different than for the checkpointed model."); final DeepLearningParameters actualParms = cp.model_info().get_params(); //actually used parameters for model building (defaults filled in, etc.) assert (actualParms != previous.model_info().get_params()); assert (actualParms != _parms); assert (actualParms != previous._parms); DeepLearningParameters.Sanity.updateParametersDuringCheckpointRestart(_parms /*user-given*/, cp.model_info().get_params() /*model_info.parameters that will be used*/, true /*doIt*/, true /*quiet*/);
model.write_lock(self()); final DeepLearning mp = model.model_info().get_params(); //use the model's parameters for everything below - NOT the job's parameters (can be different after checkpoint restart) final long model_size = model.model_info().size(); if (!quiet_mode) Log.info("Number of model parameters (weights/biases): " + String.format("%,d", model_size)); train = model.model_info().data_info()._adaptedFrame; if (mp.force_load_balance) train = updateFrame(train, reBalance(train, mp.replicate_training_data)); if (mp.classification && mp.balance_classes) { model.setModelClassDistribution(new MRUtils.ClassDist(train.lastVec()).doAll(train.lastVec()).rel_dist()); if (!mp.quiet_mode) Log.info("Initial model:\n" + model.model_info()); if (autoencoder) model.doScoring(train, trainScoreFrame, validScoreFrame, self(), getValidAdaptor()); //get the null model reconstruction error model.update(self()); Log.info("Starting to train the Deep Learning model."); do model.set_model_info(H2O.CLOUD.size() > 1 && mp.replicate_training_data ? ( mp.single_node_mode ? new DeepLearningTask2(train, model.model_info(), rowFraction(train, mp, model)).invoke(Key.make()).model_info() : //replicated data + single node mode new DeepLearningTask2(train, model.model_info(), rowFraction(train, mp, model)).invokeOnAllNodes().model_info() ) : //replicated data + multi-node mode new DeepLearningTask(model.model_info(), rowFraction(train, mp, model)).doAll(train).model_info()); //distributed data (always in multi-node mode) while (model.doScoring(train, trainScoreFrame, validScoreFrame, self(), getValidAdaptor())); if (best_model != null && best_model.error() < model.error() && Arrays.equals(best_model.model_info().units, model.model_info().units)) { Log.info("Setting the model to be the best model so far (based on scoring history)."); DeepLearningModel.DeepLearningModelInfo mi = best_model.model_info().deep_clone(); mi.set_processed_global(model.model_info().get_processed_global()); mi.set_processed_local(model.model_info().get_processed_local());
if (checkpoint == null) { cp = initModel(); cp.start_training(null); } else { final DeepLearningModel previous = UKV.get(checkpoint); ((ValidatedJob)previous.job()).xval_models = null; //remove existing cross-validation keys after checkpoint restart if (source == null || (previous.model_info().get_params().source != null && !Arrays.equals(source._key._kb, previous.model_info().get_params().source._key._kb))) { throw new IllegalArgumentException("source must be the same as for the checkpointed model."); autoencoder = previous.model_info().get_params().autoencoder; if (!autoencoder && (response == null || !source.names()[source.find(response)].equals(previous.responseName()))) { throw new IllegalArgumentException("response must be the same as for the checkpointed model."); if (Utils.difference(ignored_cols, previous.model_info().get_params().ignored_cols).length != 0 || Utils.difference(previous.model_info().get_params().ignored_cols, ignored_cols).length != 0) { ignored_cols = previous.model_info().get_params().ignored_cols; Log.warn("Automatically re-using ignored_cols from the checkpointed model."); if (classification != previous.model_info().get_params().classification) { Log.warn("Automatically switching to " + ((classification=!classification) ? "classification" : "regression") + " (same as the checkpointed model)."); try { final DataInfo dataInfo = prepareDataInfo(); cp = new DeepLearningModel(previous, destination_key, job_key, dataInfo); cp.write_lock(self()); cp.start_training(previous); assert(state==JobState.RUNNING);
Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(mymodel.model_info()); Assert.assertTrue(mymodel.model_info().get_processed_total() == epoch * fr.numRows()); neurons = DeepLearningTask.makeNeuronsForTesting(mymodel.model_info()); //link the weights to the neurons, for easy access l = neurons[1]; for (int o = 0; o < l._a.size(); o++) { Frame fpreds = mymodel.score(_test); //[0] is label, [1]...[4] are the probabilities trainPredict = mymodel.score(_train, false); final double myTrainErr = mymodel.calcError(_train, _train.lastVec(), trainPredict, trainPredict, "Final training error:", true, p.max_confusion_matrix_size, new water.api.ConfusionMatrix(), null, null); testPredict = mymodel.score(_test, false); final double myTestErr = mymodel.calcError(_test, _test.lastVec(), testPredict, testPredict, "Final testing error:", true, p.max_confusion_matrix_size, new water.api.ConfusionMatrix(), null, null); Log.info("H2O training error : " + myTrainErr * 100 + "%, test error: " + myTestErr * 100 + "%"); for (DeepLearningModel.Errors err : mymodel.scoring_history()) { best_err = Math.min(best_err, (float) err.train_err); //multi-class classification Frame bestPredict = null; try { bestPredict = mymodel.score(_train, false); final double bestErr = mymodel.calcError(_train, _train.lastVec(), bestPredict, bestPredict, "Best error:", true, p.max_confusion_matrix_size, new water.api.ConfusionMatrix(), null, null); Log.info("Best_model's error : " + bestErr * 100 + "%."); mymodel.delete_best_model();
try { model1 = dl.trainModel().get(); checkSums.add(model1.checksum()); testcount++; } catch (Throwable t) { : ("First scoring at " + model1._output._scoring_history.get(1, 3) + " epochs, should be closer to 1!" + "\n" + model1.toString()); } else if (p._train_samples_per_iteration == -1) { assert (model1.model_info().get_params()._l1 == 0); assert (model1.model_info().get_params()._l2 == 0); Assert.assertFalse(model1._output._job.isCrashed()); Assert.assertTrue(model1.model_info().get_processed_total() >= frame.numRows() * epochs); assert (model1.model_info().get_params() != model2.model_info().get_params()); assert (model1.model_info().get_params()._l1 == 0); assert (model1.model_info().get_params()._l2 == 0); Assert.assertTrue(model2.model_info().get_processed_total() >= frame.numRows() * 2 * epochs); assert (p != model1.model_info().get_params()); assert (p2 != model2.model_info().get_params()); assert (p2._hidden_dropout_ratios == null); if (p._activation.toString().contains("WithDropout")) { assert (model1.model_info().get_params()._hidden_dropout_ratios != null); assert (model2.model_info().get_params()._hidden_dropout_ratios != null);
Frame p = dl.score(tfr); hex.ModelMetrics mm = hex.ModelMetrics.getFromDKV(dl, tfr); double resdev = ((ModelMetricsRegression) mm)._mean_residual_deviance; DeepLearningModelInfo modelInfo = IcedUtils.deepCopy(dl.model_info()); //golden version long before = dl.model_info().checksum_impl(); dl.set_model_info(IcedUtils.deepCopy(modelInfo)); final DataInfo di = dl.model_info().data_info(); long cs = dl.model_info().checksum_impl(); double loss = dl.meanLoss(rowsMiniBatch); assert(cs == before); assert(before == dl.model_info().checksum_impl()); meanLoss += loss; int rows = dl.model_info().get_weights(layer).rows(); assert(dl.model_info().get_biases(layer).size()==rows); for (int row = 0; row < rows; ++row) { dl.set_model_info(IcedUtils.deepCopy(modelInfo)); Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(dl.model_info()); double[] responses = new double[miniBatchSize]; double[] offsets = new double[miniBatchSize]; DeepLearningTask.fpropMiniBatch(-1 /*seed doesn't matter*/, neurons, dl.model_info(), null, true /*training*/, responses, offsets, n);
public DeepLearningMojoWriter(DeepLearningModel model) { super(model); _parms = model.get_params(); _model_info = model.model_info(); _output = model._output; if (_model_info.isUnstable()) { // do not generate mojo for unstable model throw new UnsupportedOperationException(technote(4, "Refusing to create a MOJO for an unstable model.")); } }
Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining(mymodel.model_info()); mymodel.delete(); DeepLearning dl = new DeepLearning(p); mymodel = dl.trainModel().get(); Assert.assertTrue(mymodel.model_info().get_processed_total() == epoch * dl.train().numRows()); neurons = DeepLearningTask.makeNeuronsForTesting(mymodel.model_info()); //link the weights to the neurons, for easy access l = neurons[1]; for (int o = 0; o < l._a[0].size(); o++) { Frame fpreds = mymodel.score(_test); //[0] is label, [1]...[4] are the probabilities trainPredict = mymodel.score(_train); testPredict = mymodel.score(_test ); hex.ModelMetrics mmtrain = hex.ModelMetrics.getFromDKV(mymodel,_train); hex.ModelMetrics mmtest = hex.ModelMetrics.getFromDKV(mymodel,_test ); for (ScoringInfo e : mymodel.scoring_history()) { DeepLearningScoringInfo err = (DeepLearningScoringInfo) e; best_err = Math.min(best_err, (float) (Double.isNaN(err.scored_train._classError) ? best_err : err.scored_train._classError)); //multi-class classification Frame bestPredict = null; try { bestPredict = mymodel.score(_train); hex.ModelMetrics mmbest = hex.ModelMetrics.getFromDKV(mymodel,_train); final double bestErr = mmbest.cm().err(); mymodel.delete();