public synchronized void setDDFName(DDF ddf, String name) throws DDFException { if(!Strings.isNullOrEmpty(name)) { if (!Strings.isNullOrEmpty(ddf.getName())) { this.mNames.remove(ddf.getName()); } ddf.setName(name); this.mNames.put(name, ddf.getUUID()); } else { throw new DDFException(String.format("DDF's name cannot be null or empty")); } }
@Override public DDF subset(List<Column> columnExpr, Expression filter) throws DDFException { DDF subset = _subset(columnExpr, filter); subset.getMetaDataHandler().copyFactor(this.getDDF(), this.getDDF().getColumnNames()); return subset; }
@Override public PersistenceUri persist(boolean doOverwrite) throws DDFException { return this.getPersistenceHandler().persist(doOverwrite); }
public FiveNumSummary[] getFiveNumSummary() throws DDFException { return this.getStatisticsSupporter().getFiveNumSummary(this.getColumnNames()); }
@Test public void testReservedFactor() throws DDFException { ddf.setAsFactor("year"); ddf.setAsFactor("month"); Assert.assertTrue(ddf.getSchema() != null); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); ddf.setMutable(true); ddf = ddf.Transform.transformUDF("test123= round(distance/2, 2)"); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(9, ddf.getNumColumns()); Assert.assertEquals("test123", ddf.getColumnName(8)); Assert.assertEquals(9, ddf.VIEWS.head(1).get(0).split("\\t").length); System.out.println(">>>>> column class = " + ddf.getColumn("year").getColumnClass()); System.out.println(">>>>> column class = " + ddf.getColumn("month").getColumnClass()); Assert.assertTrue(ddf.getColumn("year").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("month").getColumnClass() == Schema.ColumnClass.FACTOR); Assert.assertTrue(ddf.getColumn("year").getOptionalFactor().getLevels().size() > 0); Assert.assertTrue(ddf.getColumn("month").getOptionalFactor().getLevels().size() > 0); System.out.println(">>>>>>>>>>>>> " + ddf.getSchema().getColumns()); }
/** * Initialization to be done after constructor assignments, such as setting of the all-important DDFManager. */ protected void initialize(DDFManager manager, Object data, Class<?>[] typeSpecs, String name, Schema schema) throws DDFException { this.validateSchema(schema); this.setManager(manager); // this must be done first in case later stuff needs a manager if (typeSpecs != null) { this.getRepresentationHandler().set(data, typeSpecs); } this.getSchemaHandler().setSchema(schema); if(schema!= null && schema.getTableName() == null) { String tableName = this.getSchemaHandler().newTableName(); schema.setTableName(tableName); } manager.setDDFUUID(this, UUID.randomUUID()); if(!Strings.isNullOrEmpty(name)) manager.setDDFName(this, name); // Facades this.ML = new MLFacade(this, this.getMLSupporter()); this.VIEWS = new ViewsFacade(this, this.getViewHandler()); this.Transform = new TransformFacade(this, this.getTransformationHandler()); this.R = new RFacade(this, this.getAggregationHandler()); }
DDF newddf = null; int numcols = this.getDDF().getNumColumns(); if (columns == null) { columns = this.getDDF().getColumnNames(); newddf = this.getManager().sql2ddf(String.format(sqlCmd, this.getDDF().getTableName()), false); long numrows = this.getDDF().getNumRows(); if (thresh > 0) { if (thresh > numrows) { newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf;
@Override public DDF updateInplace(DDF newddf) throws DDFException { //copy content of newddf to this ddf DDF curDDF = this.getDDF(); curDDF.getRepresentationHandler().reset(); curDDF.getRepresentationHandler().setRepresentations(newddf.getRepresentationHandler().getAllRepresentations()); newddf.getMetaDataHandler().copyFactor(this.getDDF()); curDDF.getSchemaHandler().setSchema(newddf.getSchema()); return curDDF; } }
@Override public DDF transformScaleStandard() throws DDFException { Summary[] summaryArr = this.getDDF().getSummary(); List<Column> columns = this.getDDF().getSchema().getColumns(); // Compose a transformation query StringBuffer sqlCmdBuffer = new StringBuffer("SELECT "); for (int i = 0; i < columns.size(); i++) { Column col = columns.get(i); if (!col.isNumeric() || col.getColumnClass() == ColumnClass.FACTOR) { sqlCmdBuffer.append(col.getName()); } else { // subtract mean, divide by stdev sqlCmdBuffer.append(String.format("((%s - %s) / %s) as %s ", col.getName(), summaryArr[i].mean(), summaryArr[i].stdev(), col.getName())); } sqlCmdBuffer.append(","); } sqlCmdBuffer.setLength(sqlCmdBuffer.length() - 1); sqlCmdBuffer.append("FROM ").append(this.getDDF().getTableName()); DDF newddf = this.getManager().sql2ddf(sqlCmdBuffer.toString(), false); newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; }
ddf.setMutable(true); ddf = ddf.Transform.transformUDF("dist= round(distance/2, 2)"); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(9, ddf.getNumColumns()); Assert.assertEquals("dist", ddf.getColumnName(8)); Assert.assertEquals(9, ddf.VIEWS.head(1).get(0).split("\\t").length); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(10, ddf.getNumColumns()); Assert.assertEquals(10, ddf.getSummary().length); Assert.assertEquals(31, ddf.getNumRows()); Assert.assertEquals(5, ddf.getNumColumns()); Assert.assertEquals("speed", ddf.getColumnName(4)); ddf.setMutable(false); Assert.assertEquals(31, ddf3.getNumRows()); Assert.assertEquals(6, ddf3.getNumColumns()); Assert.assertEquals("speed", ddf3.getColumnName(5)); Assert.assertEquals(6, ddf3.getSummary().length); TransformationHandler.RToSqlUdf(s3)); DDF ddf2 = ddf.Transform.transformUDF(s1, lcols); Assert.assertEquals(31, ddf2.getNumRows()); Assert.assertEquals(6, ddf2.getNumColumns());
public DDF transformUDF(List<String> RExps, List<String> columns) throws DDFException { String sqlCmd = String.format("SELECT %s FROM %s", RToSqlUdf(RExps, columns, this.getDDF().getSchema().getColumns()), "{1}"); DDF newddf = this.getManager().sql2ddf(sqlCmd, new SQLDataSourceDescriptor(sqlCmd, null, null, null, this.getDDF() .getUUID().toString())); if (this.getDDF().isMutable()) { return this.getDDF().updateInplace(newddf); } else { newddf.getMetaDataHandler().copyFactor(this.getDDF()); return newddf; } }
@Test public void testDropNA() throws DDFException { DDF newddfDropRow = ddf.dropNA(); DDF newddfDropColumn = ddf.dropNA(Axis.COLUMN); Assert.assertEquals(9, newddfDropRow.getNumRows()); Assert.assertEquals(22, newddfDropColumn.getNumColumns()); Assert.assertEquals(29, ddf.getMissingDataHandler().dropNA(Axis.COLUMN, NAChecking.ALL, 0, null).getNumColumns()); }
public synchronized void setDDFUUID(DDF ddf, UUID uuid) throws DDFException { if(this.hasDDF(uuid)) { throw new DDFException(String.format("DDF with uuid %s already exists", uuid)); } else { //remove old key UUID prevUUID = ddf.getUUID(); if(prevUUID != null) { mDDFs.remove(prevUUID); } ddf.setUUID(uuid); mDDFs.put(uuid, ddf); if(ddf.getName()!= null) { mNames.remove(ddf.getName()); mNames.put(ddf.getName(), ddf.getUUID()); } } }
public void removeDDF(DDF ddf) throws DDFException { ddf.getRepresentationHandler().uncacheAll(); ddf.getRepresentationHandler().reset(); mDDFCache.removeDDF(ddf); }
public DDF sql2ddf(String sqlCommand) throws DDFException { try { // sqlCommand = sqlCommand.replace("@this", this.getTableName()); sqlCommand = sqlCommand.replace("@this", "{1}"); sqlCommand = String.format(sqlCommand, "{1}"); SQLDataSourceDescriptor sqlDS = new SQLDataSourceDescriptor(sqlCommand, null, null,null, this.getUUID().toString()); return this.getManager().sql2ddf(sqlCommand, null, sqlDS); // return this.getManager().sql2ddf(sqlCommand); } catch (Exception e) { throw new DDFException(String.format("Error executing queries for ddf %s", this.getTableName()), e); } }
@Ignore public void testTransformMapReduceNative() throws DDFException { // aggregate sum of month group by year String mapFuncDef = "function(part) { keyval(key=part$year, val=part$month) }"; String reduceFuncDef = "function(key, vv) { keyval.row(key=key, val=sum(vv)) }"; DDF newddf = ddf.Transform.transformMapReduceNative(mapFuncDef, reduceFuncDef); System.out.println("name " + ddf.getName()); System.out.println("newname " + newddf.getName()); Assert.assertNotNull(newddf); Assert.assertTrue(newddf.getColumnName(0).equals("key")); Assert.assertTrue(newddf.getColumnName(1).equals("val")); Assert.assertTrue(newddf.getSchemaHandler().getColumns().get(0).getType() == ColumnType.STRING); Assert.assertTrue(newddf.getSchemaHandler().getColumns().get(1).getType() == ColumnType.INT); }
@Override public String toString() { return this.getName(); }
public Factor<?> setAsFactor(String columnName) { return this.getSchemaHandler().setAsFactor(columnName); }
DDF newddf = ddf.binning("dayofweek", "EQUALINTERVAL", 2, null, true, true); Assert.assertEquals(ColumnClass.FACTOR, newddf.getSchemaHandler().getColumn("dayofweek").getColumnClass()); Assert.assertEquals(2, newddf.getSchemaHandler().getColumn("dayofweek").getOptionalFactor().getLevelMap().size()); DDF newddf2 = ddf.binning("dayofweek", "EQUAlFREQ", 2, null, true, true); Assert.assertEquals(ColumnClass.FACTOR, newddf2.getSchemaHandler().getColumn("dayofweek").getColumnClass()); Assert.assertEquals(2, newddf2.getSchemaHandler().getColumn("dayofweek").getOptionalFactor().getLevelMap().size()); DDF ddf1 = ddf.binning("month", "custom", 0, new double[] { 2, 4, 6, 8 }, true, true); Assert.assertTrue(ddf1.getSchemaHandler().getColumn("month").getColumnClass() == ColumnClass.FACTOR); ddf1.getSchemaHandler().computeFactorLevelsAndLevelCounts(); Assert.assertTrue(ddf1.getSchemaHandler().getColumn("month").getOptionalFactor().getLevelMap().get("[2,4]") > 0); Assert.assertEquals(ddf1.getSchemaHandler().getColumn("month").getOptionalFactor().getLevelCounts().get("[2,4]"), 6, 0); Assert.assertFalse(Strings.isNullOrEmpty(newddf.sql("select dayofweek from @this", "").getRows().get(0))); Assert.assertFalse(Strings.isNullOrEmpty(ddf1.sql("select month from @this", "").getRows().get(0))); Column col = ddf1.getSchemaHandler().getColumn("month"); MetaInfo mi = new MetaInfo(col.getName(), col.getType().toString().toLowerCase()); mi = mi.setFactor(col.getOptionalFactor().getLevelMap()); Assert.assertTrue(mi.hasFactor()); MetaInfo[] m = generateMetaInfo(newddf.getSchema()); for (int i = 0; i < m.length; i++) { if (m[i].getHeader().equals("dayofweek")) {