public static int getNumLeaves(AutoBuffer ab, int leftSize, boolean regression) { int result = 0; int startPos = ab.position(); while (ab.position() < startPos + leftSize) { byte currentNodeType = (byte) ab.get1(); if (currentNodeType == 'S' || currentNodeType == 'E') { ab.get2(); ab.get4f(); // skip col and split value. int skipSize = ab.get1(); if (skipSize == 0) { ab.get3();} } else if (currentNodeType == '[') { result ++; if (regression) ab.get4f(); else ab.get1(); } else { throw new NotImplementedException(); } } ab.position(startPos); // return to the original position so the buffer seems untouched. return result; }
public static int getSkip(AutoBuffer ab, int leftSize, boolean regression) { int numLeaves = 0; int numLeftLeaves = 0; int startPos = ab.position(); boolean prevIsS = false; while (ab.position() < startPos + leftSize) { byte currentNodeType = (byte) ab.get1(); if (currentNodeType == 'S' || currentNodeType == 'E') { ab.get2(); ab.get4f(); // skip col and split value. int skipSize = ab.get1(); if (skipSize == 0) { ab.get3();} prevIsS = true; } else if (currentNodeType == '[') { numLeaves ++; if (regression) ab.get4f(); else ab.get1(); if (prevIsS) numLeftLeaves++; prevIsS = false; } else { throw new NotImplementedException(); } } ab.position(startPos); return 2*numLeaves - numLeftLeaves; // only for regression tree. }
public static double classify( AutoBuffer ts, double[] ds, double badat, boolean regression ) { ts.get4(); // Skip tree-id ts.get8(); // Skip seed ts.get1(); // Skip producer id byte b; while( (b = (byte) ts.get1()) != '[' ) { // While not a leaf indicator assert b == '(' || b == 'S' || b == 'E'; int col = ts.get2(); // Column number in model-space float fcmp = ts.get4f(); // Float to compare against float fdat = Double.isNaN(ds[col]) ? fcmp - 1 : (float)ds[col]; int skip = (ts.get1()&0xFF); if( skip == 0 ) skip = ts.get3(); if (b == 'E') { if (fdat != fcmp) ts.position(ts.position() + skip); } else { // Picking right subtree? then skip left subtree if( fdat > fcmp ) ts.position(ts.position() + skip); } } if(regression) return ts.get4f(); return ts.get1()&0xFF; // Return the leaf's class }
/** Classify this serialized tree - withOUT inflating it to a full tree. Use row 'row' in the dataset 'ary' (with pre-fetched bits 'databits') Returns classes from 0 to N-1*/ public static float classify( AutoBuffer ts, Chunk[] chks, int row, int modelDataMap[], short badData, boolean regression ) { ts.get4(); // Skip tree-id ts.get8(); // Skip seed ts.get1(); // Skip producer id byte b; while( (b = (byte) ts.get1()) != '[' ) { // While not a leaf indicator assert b == '(' || b == 'S' || b == 'E'; int col = modelDataMap[ts.get2()]; // Column number in model-space mapped to data-space float fcmp = ts.get4f(); // Float to compare against if( chks[col].isNA0(row) ) return badData; float fdat = (float)chks[col].at0(row); int skip = (ts.get1()&0xFF); if( skip == 0 ) skip = ts.get3(); if (b == 'E') { if (fdat != fcmp) ts.position(ts.position() + skip); } else { // Picking right subtree? then skip left subtree if( fdat > fcmp ) ts.position(ts.position() + skip); } } if (regression) { return ts.get4f(); } return (float)((short) ( ts.get1()&0xFF )); // Return the leaf's class }
public final TreeVisitor<T> visit() throws T { byte b = (byte) _ts.get1(); if( b == '[' ) { if (_regression) return leafFloat(_ts.get4f()); return leaf(_ts.get1()&0xFF); } assert b == '(' || b == 'S' || b =='E' : b; int off0 = _ts.position()-1; // Offset to start of *this* node int col = _ts.get2(); // Column number float fcmp = _ts.get4f(); // Float to compare against int skip = (_ts.get1()&0xFF); if( skip == 0 ) skip = _ts.get3(); int offl = _ts.position(); // Offset to start of *left* node int offr = _ts.position()+skip; // Offset to start of *right* node return pre(col,fcmp,off0,offl,offr).visit().mid(col,fcmp).visit().post(col,fcmp); } }
assert (equal >= 0 && equal <= 3): "illegal equal value " + equal+" at "+ab.position()+" in bitpile "+Arrays.toString(_bits); case 16: skip = _nclass < 256?1:2; break; // Small leaf case 48: skip = 4; break; // skip the prediction default: assert false:"illegal lmask value " + lmask+" at "+ab.position()+" in bitpile "+Arrays.toString(_bits); ( equal==1 && ((float)row[colId]) == splitVal) || ( (equal==2 || equal==3) && grpContains )) { ab.position(ab.position()+skip); // Skip to the right subtree
static public Key make(byte[] kb, byte rf, byte systemType, H2ONode... replicas) { // no more than 3 replicas allowed to be stored in the key assert 0 <=replicas.length && replicas.length<=3; assert systemType<32; // only system keys allowed // Key byte layout is: // 0 - systemType, from 0-31 // 1 - replica-count, plus up to 3 bits for ip4 vs ip6 // 2-n - zero, one, two or 3 IP4 (4+2 bytes) or IP6 (16+2 bytes) addresses // 2-5- 4 bytes of chunk#, or -1 for masters // n+ - repeat of the original kb AutoBuffer ab = new AutoBuffer(); ab.put1(systemType).put1(replicas.length); for( H2ONode h2o : replicas ) h2o.write(ab); ab.put4(-1); ab.putA1(kb,kb.length); return make(Arrays.copyOf(ab.buf(),ab.position()),rf); }
while (ab.position() < cap) { byte _nodeType = 0; byte currentNodeType = (byte) ab.get1(); } else {/* single byte for left size */ skip=skipSize; /* 1 byte to store skip*/} int currentPosition = ab.position(); byte leftType = (byte) ab.get1(); ab.position(currentPosition+skip); // jump to the right child. byte rightType = (byte) ab.get1(); ab.position(currentPosition); if (leftType == '[') { _nodeType |= 0x30; } if (rightType == '[') { _nodeType |= 0xC0; }
public TreeModel.CompressedTree compress() { // Log.info(Sys.RANDF, _tree.toString(new StringBuilder(), Integer.MAX_VALUE).toString()); int size = _tree.dtreeSize(); if (_tree instanceof LeafNode) { size += 3; } AutoBuffer ab = new AutoBuffer(size); if( _tree instanceof LeafNode) ab.put1(0).put2((char)65535); _tree.compress(ab); assert ab.position() == size: "Actual size doesn't agree calculated size."; char _nclass = (char)_data.classes(); return new TreeModel.CompressedTree(ab.buf(),_nclass,_seed); }
@Override public AutoBuffer compress(AutoBuffer ab) { int pos = ab.position(); if( _nodeType == 0 ) size(); // Sets _nodeType & _size both Node rite = _tree.node(_nids[1]); rite.compress(ab); assert _size == ab.position()-pos:"reported size = " + _size + " , real size = " + (ab.position()-pos); return ab;
while (ab.position() < cap) { byte currentNodeType = (byte) ab.get1(); if (currentNodeType == 'S') {
public TreeModel.CompressedTree compress() { int sz = root().size(); if( root() instanceof LeafNode ) sz += 3; // Oops - tree-stump AutoBuffer ab = new AutoBuffer(sz); if( root() instanceof LeafNode ) // Oops - tree-stump ab.put1(0).put2((char)65535); // Flag it special so the decompress doesn't look for top-level decision root().compress(ab); // Compress whole tree assert ab.position() == sz; return new TreeModel.CompressedTree(ab.buf(),_nclass,_seed); } /** Save this tree into DKV store under default random Key. */
public CompressedTree compress(int tid, int cls, String[][] domains) { int sz = root().size(); if( root() instanceof LeafNode ) sz += 3; // Oops - tree-stump AutoBuffer ab = new AutoBuffer(sz); _abAux = new AutoBuffer(); if( root() instanceof LeafNode ) // Oops - tree-stump ab.put1(0).put2((char)65535); // Flag it special so the decompress doesn't look for top-level decision root().compress(ab, _abAux); // Compress whole tree assert ab.position() == sz; return new CompressedTree(ab.buf(), _seed,tid,cls); }
@Override public AutoBuffer compress(AutoBuffer ab, AutoBuffer abAux) { int pos = ab.position(); if( _nodeType == 0 ) size(); // Sets _nodeType & _size both Node rite = _tree.node(_nids[1]); rite.compress(ab, abAux); assert _size == ab.position()-pos:"reported size = " + _size + " , real size = " + (ab.position()-pos); return ab;
public CompressedTree compress(int tid, int cls, String[][] domains) { int sz = root().size(); if( root() instanceof LeafNode ) sz += 3; // Oops - tree-stump AutoBuffer ab = new AutoBuffer(sz); _abAux = new AutoBuffer(); if( root() instanceof LeafNode ) // Oops - tree-stump ab.put1(0).put2((char)65535); // Flag it special so the decompress doesn't look for top-level decision root().compress(ab, _abAux); // Compress whole tree assert ab.position() == sz; return new CompressedTree(ab.buf(), _seed,tid,cls); }
@Override public AutoBuffer compress(AutoBuffer ab, AutoBuffer abAux) { int pos = ab.position(); if( _nodeType == 0 ) size(); // Sets _nodeType & _size both Node rite = _tree.node(_nids[1]); rite.compress(ab, abAux); assert _size == ab.position()-pos:"reported size = " + _size + " , real size = " + (ab.position()-pos); return ab;