public static int getNumLeaves(AutoBuffer ab, int leftSize, boolean regression) { int result = 0; int startPos = ab.position(); while (ab.position() < startPos + leftSize) { byte currentNodeType = (byte) ab.get1(); if (currentNodeType == 'S' || currentNodeType == 'E') { ab.get2(); ab.get4f(); // skip col and split value. int skipSize = ab.get1(); if (skipSize == 0) { ab.get3();} } else if (currentNodeType == '[') { result ++; if (regression) ab.get4f(); else ab.get1(); } else { throw new NotImplementedException(); } } ab.position(startPos); // return to the original position so the buffer seems untouched. return result; }
public static int getSkip(AutoBuffer ab, int leftSize, boolean regression) { int numLeaves = 0; int numLeftLeaves = 0; int startPos = ab.position(); boolean prevIsS = false; while (ab.position() < startPos + leftSize) { byte currentNodeType = (byte) ab.get1(); if (currentNodeType == 'S' || currentNodeType == 'E') { ab.get2(); ab.get4f(); // skip col and split value. int skipSize = ab.get1(); if (skipSize == 0) { ab.get3();} prevIsS = true; } else if (currentNodeType == '[') { numLeaves ++; if (regression) ab.get4f(); else ab.get1(); if (prevIsS) numLeftLeaves++; prevIsS = false; } else { throw new NotImplementedException(); } } ab.position(startPos); return 2*numLeaves - numLeftLeaves; // only for regression tree. }
public static double classify( AutoBuffer ts, double[] ds, double badat, boolean regression ) { ts.get4(); // Skip tree-id ts.get8(); // Skip seed ts.get1(); // Skip producer id byte b; while( (b = (byte) ts.get1()) != '[' ) { // While not a leaf indicator assert b == '(' || b == 'S' || b == 'E'; int col = ts.get2(); // Column number in model-space float fcmp = ts.get4f(); // Float to compare against float fdat = Double.isNaN(ds[col]) ? fcmp - 1 : (float)ds[col]; int skip = (ts.get1()&0xFF); if( skip == 0 ) skip = ts.get3(); if (b == 'E') { if (fdat != fcmp) ts.position(ts.position() + skip); } else { // Picking right subtree? then skip left subtree if( fdat > fcmp ) ts.position(ts.position() + skip); } } if(regression) return ts.get4f(); return ts.get1()&0xFF; // Return the leaf's class }
case 0: skip = ab.get1(); break; case 1: skip = ab.get2(); break; case 2: skip = ab.get3(); break; case 3: skip = ab.get4(); break; case 16: skip = _nclass < 256?1:2; break; // Small leaf
skip = ab.get3(); } else {/* single byte for left size */ skip=skipSize; /* 1 byte to store skip*/}
public final TreeVisitor<T> visit() throws T { byte b = (byte) _ts.get1(); if( b == '[' ) { if (_regression) return leafFloat(_ts.get4f()); return leaf(_ts.get1()&0xFF); } assert b == '(' || b == 'S' || b =='E' : b; int off0 = _ts.position()-1; // Offset to start of *this* node int col = _ts.get2(); // Column number float fcmp = _ts.get4f(); // Float to compare against int skip = (_ts.get1()&0xFF); if( skip == 0 ) skip = _ts.get3(); int offl = _ts.position(); // Offset to start of *left* node int offr = _ts.position()+skip; // Offset to start of *right* node return pre(col,fcmp,off0,offl,offr).visit().mid(col,fcmp).visit().post(col,fcmp); } }
/** Classify this serialized tree - withOUT inflating it to a full tree. Use row 'row' in the dataset 'ary' (with pre-fetched bits 'databits') Returns classes from 0 to N-1*/ public static float classify( AutoBuffer ts, Chunk[] chks, int row, int modelDataMap[], short badData, boolean regression ) { ts.get4(); // Skip tree-id ts.get8(); // Skip seed ts.get1(); // Skip producer id byte b; while( (b = (byte) ts.get1()) != '[' ) { // While not a leaf indicator assert b == '(' || b == 'S' || b == 'E'; int col = modelDataMap[ts.get2()]; // Column number in model-space mapped to data-space float fcmp = ts.get4f(); // Float to compare against if( chks[col].isNA0(row) ) return badData; float fdat = (float)chks[col].at0(row); int skip = (ts.get1()&0xFF); if( skip == 0 ) skip = ts.get3(); if (b == 'E') { if (fdat != fcmp) ts.position(ts.position() + skip); } else { // Picking right subtree? then skip left subtree if( fdat > fcmp ) ts.position(ts.position() + skip); } } if (regression) { return ts.get4f(); } return (float)((short) ( ts.get1()&0xFF )); // Return the leaf's class }
case 0: skip = _ts.get1(); break; case 1: skip = _ts.get2(); break; case 2: skip = _ts.get3(); break; case 3: skip = _ts.get4(); break; case 48: skip = 4; break; // skip is always 4 for direct leaves (see DecidedNode.size() and LeafNode.size() methods)
case 0: skip = _ts.get1(); break; case 1: skip = _ts.get2(); break; case 2: skip = _ts.get3(); break; case 3: skip = _ts.get4(); break; case 16: skip = _ct._nclass < 256?1:2; break; // Small leaf
case 0: skip = _ts.get1(); break; case 1: skip = _ts.get2(); break; case 2: skip = _ts.get3(); break; case 3: skip = _ts.get4(); break; case 48: skip = 4; break; // skip is always 4 for direct leaves (see DecidedNode.size() and LeafNode.size() methods)