@Test public void demo() { final String EXAMPLE = "Name\tColor\tServing Size (g)\n" + "String\tCategory\tDouble\n" + "Banana\tYellow\t118\n" + "Blueberry\tBlue\t148\n" + "Lemon\tYellow\t83\n" + "Apple\tGreen\t182"; DataFrame dataFrame = Parser.tsv(new StringReader(EXAMPLE)); // Lookup typed identifiers by column index final StringColumnId NAME = dataFrame.getColumnId(0, ColumnType.STRING); final CategoryColumnId COLOR = dataFrame.getColumnId(1, ColumnType.CATEGORY); final DoubleColumnId SERVING_SIZE = dataFrame.getColumnId(2, ColumnType.DOUBLE); // Use identifier to access columns & values StringColumn nameColumn = dataFrame.getColumn(NAME); IndexedSeq<String> nameValues = nameColumn.getValues(); // ... or access individual values via row index / column id String yellow = dataFrame.getValueAt(2, COLOR); }
@Test public void customColumnTypes() { GenericColumnId fileColumnId = new GenericColumnId("File", new ColumnType<>("File", GenericColumnId.class)); File fileA = new File("/path/to/a.txt"); File fileB = new File("/path/to/b.txt"); GenericColumn<File, GenericColumnId> fileColumn = GenericColumn.ofAll(fileColumnId, fileA, fileB); DataFrame df = DataFrame.of(fileColumn); assertEquals(2, df.getRowCount()); assertEquals(1, df.getColumnCount()); GenericColumn<File, GenericColumnId> column = df.getColumn(fileColumnId); assertEquals(fileColumn, column); File fileValue = df.getValueAt(1, fileColumnId); assertEquals(fileB, fileValue); }
StringColumn nameColumn = df.getColumn(nameColumnId); assertEquals(Array.of("Ada", "Homer", "Hillary"), nameColumn.getValues()); IntColumn ageColumn = df.getColumn(ageColumnId); assertArrayEquals(new int[]{42, 99, 67}, ageColumn.valueStream().toArray()); DoubleColumn heightColumn = df.getColumn(heightColumnId); assertArrayEquals(new double[]{1.74, 1.20, 1.70}, heightColumn.valueStream().toArray(), 0.01); BooleanColumn vegetarianColumn = df.getColumn(vegetarianColumnId); assertEquals(Array.of(true, false, false), vegetarianColumn.valueStream().toArray()); TimestampColumn dateOfBirthColumn = df.getColumn(dateOfBirthColumnId); Function<? super Instant, Month> toMonth = instant -> instant.atZone(ZoneId.from(ZoneOffset.UTC)).getMonth(); assertEquals(asList(Month.AUGUST, Month.JANUARY, Month.OCTOBER), dateOfBirthColumn.getValues().map(toMonth).toJavaList()); CategoryColumn genderColumn = df.getColumn(genderColumnId); assertEquals(HashSet.of("Female", "Male"), genderColumn.getCategories());
assertEquals(nameColumn, df.getColumn(NAME)); assertEquals(Array.of("Ada", "Homer", "Hillary"), nameColumn.getValues()); assertEquals(ageColumn, df.getColumn(AGE)); assertArrayEquals(new int[]{42, 99, 67}, ageColumn.valueStream().toArray()); assertEquals(heightColumn, df.getColumn(HEIGHT)); assertArrayEquals(new double[]{1.74, 1.20, 1.70}, heightColumn.valueStream().toArray(), 0.01); assertEquals(vegetarianColumn, df.getColumn(VEGETARIAN)); assertEquals(Array.of(true, false, false), vegetarianColumn.valueStream().toArray()); assertEquals(dateOfBirthColumn, df.getColumn(DATE_OF_BIRTH)); assertEquals(Array.of(AUG_26_1975, JAN_08_2006, OCT_26_1947), dateOfBirthColumn.getValues()); assertEquals(genderColumn, df.getColumn(GENDER)); assertEquals(HashSet.of("Female", "Male"), genderColumn.getCategories()); Column<?> column = df.getColumn(nonSpecificId); assertNotNull(column); assertEquals(Option.some("rocks"), column.getMetaData().get("meta-data"));
private static void assertMetaDataParsedCorrectly(DataFrame df) { Map<String, String> dataFrameMetaData = df.getMetaData(); assertEquals(1, dataFrameMetaData.size()); assertEquals(Option.of("netzwerg"), dataFrameMetaData.get("author")); Map<String, String> columnMetaData = df.getColumn(df.getColumnId(2, ColumnType.DOUBLE)).getMetaData(); assertEquals(1, columnMetaData.size()); assertEquals(Option.of("m"), columnMetaData.get("unit")); }